Spaces:
Paused
Paused
| """ | |
| ShortSmith v2 - Gradio Application | |
| Hugging Face Space interface for video highlight extraction. | |
| Features: | |
| - Multi-modal analysis (visual + audio + motion) | |
| - Domain-optimized presets | |
| - Person-specific filtering (optional) | |
| - Scene-aware clip cutting | |
| """ | |
| import os | |
| import sys | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| import time | |
| import traceback | |
| import gradio as gr | |
| # Add project root to path | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| # Initialize logging | |
| try: | |
| from utils.logger import setup_logging, get_logger | |
| setup_logging(log_level="INFO", log_to_console=True) | |
| logger = get_logger("app") | |
| except Exception: | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger("app") | |
| def process_video( | |
| video_file, | |
| domain, | |
| num_clips, | |
| clip_duration, | |
| reference_image, | |
| custom_prompt, | |
| progress=gr.Progress() | |
| ): | |
| """ | |
| Main video processing function. | |
| Args: | |
| video_file: Uploaded video file path | |
| domain: Content domain for scoring weights | |
| num_clips: Number of clips to extract | |
| clip_duration: Duration of each clip in seconds | |
| reference_image: Optional reference image for person filtering | |
| custom_prompt: Optional custom instructions | |
| progress: Gradio progress tracker | |
| Returns: | |
| Tuple of (status_message, clip1, clip2, clip3, log_text) | |
| """ | |
| if video_file is None: | |
| return "Please upload a video first.", None, None, None, "" | |
| log_messages = [] | |
| def log(msg): | |
| log_messages.append(f"[{time.strftime('%H:%M:%S')}] {msg}") | |
| logger.info(msg) | |
| try: | |
| video_path = Path(video_file) | |
| log(f"Processing video: {video_path.name}") | |
| progress(0.05, desc="Validating video...") | |
| # Import pipeline components | |
| from utils.helpers import validate_video_file, validate_image_file, format_duration | |
| from pipeline.orchestrator import PipelineOrchestrator | |
| # Validate video | |
| validation = validate_video_file(video_file) | |
| if not validation.is_valid: | |
| return f"Error: {validation.error_message}", None, None, None, "\n".join(log_messages) | |
| log(f"Video size: {validation.file_size / (1024*1024):.1f} MB") | |
| # Validate reference image if provided | |
| ref_path = None | |
| if reference_image is not None: | |
| ref_validation = validate_image_file(reference_image) | |
| if ref_validation.is_valid: | |
| ref_path = reference_image | |
| log(f"Reference image: {Path(reference_image).name}") | |
| else: | |
| log(f"Warning: Invalid reference image - {ref_validation.error_message}") | |
| # Map domain string to internal value | |
| domain_map = { | |
| "Sports": "sports", | |
| "Vlogs": "vlogs", | |
| "Music Videos": "music", | |
| "Podcasts": "podcasts", | |
| "Gaming": "gaming", | |
| "General": "general", | |
| } | |
| domain_value = domain_map.get(domain, "general") | |
| log(f"Domain: {domain_value}") | |
| # Create output directory | |
| output_dir = Path(tempfile.mkdtemp(prefix="shortsmith_output_")) | |
| log(f"Output directory: {output_dir}") | |
| # Progress callback to update UI during processing | |
| def on_progress(pipeline_progress): | |
| stage = pipeline_progress.stage.value | |
| pct = pipeline_progress.progress | |
| msg = pipeline_progress.message | |
| log(f"[{stage}] {msg}") | |
| # Map pipeline progress (0-1) to our range (0.1-0.9) | |
| mapped_progress = 0.1 + (pct * 0.8) | |
| progress(mapped_progress, desc=f"{stage}: {msg}") | |
| # Initialize pipeline | |
| progress(0.1, desc="Initializing AI models...") | |
| log("Initializing pipeline...") | |
| pipeline = PipelineOrchestrator(progress_callback=on_progress) | |
| # Process video | |
| progress(0.15, desc="Starting analysis...") | |
| log(f"Processing: {int(num_clips)} clips @ {int(clip_duration)}s each") | |
| result = pipeline.process( | |
| video_path=video_path, | |
| num_clips=int(num_clips), | |
| clip_duration=float(clip_duration), | |
| domain=domain_value, | |
| reference_image=ref_path, | |
| custom_prompt=custom_prompt.strip() if custom_prompt else None, | |
| ) | |
| progress(0.9, desc="Extracting clips...") | |
| # Handle result | |
| if result.success: | |
| log(f"Processing complete in {result.processing_time:.1f}s") | |
| clip_paths = [] | |
| for i, clip in enumerate(result.clips): | |
| if clip.clip_path.exists(): | |
| output_path = output_dir / f"highlight_{i+1}.mp4" | |
| shutil.copy2(clip.clip_path, output_path) | |
| clip_paths.append(str(output_path)) | |
| log(f"Clip {i+1}: {format_duration(clip.start_time)} - {format_duration(clip.end_time)} (score: {clip.hype_score:.2f})") | |
| status = f"Successfully extracted {len(clip_paths)} highlight clips!\nProcessing time: {result.processing_time:.1f}s" | |
| pipeline.cleanup() | |
| progress(1.0, desc="Done!") | |
| # Return up to 3 clips | |
| clip1 = clip_paths[0] if len(clip_paths) > 0 else None | |
| clip2 = clip_paths[1] if len(clip_paths) > 1 else None | |
| clip3 = clip_paths[2] if len(clip_paths) > 2 else None | |
| return status, clip1, clip2, clip3, "\n".join(log_messages) | |
| else: | |
| log(f"Processing failed: {result.error_message}") | |
| pipeline.cleanup() | |
| return f"Error: {result.error_message}", None, None, None, "\n".join(log_messages) | |
| except Exception as e: | |
| error_msg = f"Unexpected error: {str(e)}" | |
| log(error_msg) | |
| log(traceback.format_exc()) | |
| logger.exception("Pipeline error") | |
| return error_msg, None, None, None, "\n".join(log_messages) | |
| # Build Gradio interface | |
| with gr.Blocks( | |
| title="ShortSmith v2", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .container { max-width: 1200px; margin: auto; } | |
| .output-video { min-height: 200px; } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 🎬 ShortSmith v2 | |
| ### AI-Powered Video Highlight Extractor | |
| Upload a video and automatically extract the most engaging highlight clips using AI analysis. | |
| """) | |
| with gr.Row(): | |
| # Left column - Inputs | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📤 Input") | |
| video_input = gr.Video( | |
| label="Upload Video", | |
| sources=["upload"], | |
| ) | |
| with gr.Accordion("⚙️ Settings", open=True): | |
| domain_dropdown = gr.Dropdown( | |
| choices=["Sports", "Vlogs", "Music Videos", "Podcasts", "Gaming", "General"], | |
| value="General", | |
| label="Content Domain", | |
| info="Select the type of content for optimized scoring" | |
| ) | |
| with gr.Row(): | |
| num_clips_slider = gr.Slider( | |
| minimum=1, | |
| maximum=3, | |
| value=3, | |
| step=1, | |
| label="Number of Clips", | |
| info="How many highlight clips to extract" | |
| ) | |
| duration_slider = gr.Slider( | |
| minimum=5, | |
| maximum=30, | |
| value=15, | |
| step=1, | |
| label="Clip Duration (seconds)", | |
| info="Target duration for each clip" | |
| ) | |
| with gr.Accordion("👤 Person Filtering (Optional)", open=False): | |
| reference_image = gr.Image( | |
| label="Reference Image", | |
| type="filepath", | |
| sources=["upload"], | |
| ) | |
| gr.Markdown("*Upload a photo of a person to prioritize clips featuring them.*") | |
| with gr.Accordion("📝 Custom Instructions (Optional)", open=False): | |
| custom_prompt = gr.Textbox( | |
| label="Additional Instructions", | |
| placeholder="E.g., 'Focus on crowd reactions' or 'Prioritize action scenes'", | |
| lines=2, | |
| ) | |
| process_btn = gr.Button( | |
| "🚀 Extract Highlights", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Right column - Outputs | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📥 Output") | |
| status_output = gr.Textbox( | |
| label="Status", | |
| lines=2, | |
| interactive=False | |
| ) | |
| gr.Markdown("#### Extracted Clips") | |
| clip1_output = gr.Video(label="Clip 1", elem_classes=["output-video"]) | |
| clip2_output = gr.Video(label="Clip 2", elem_classes=["output-video"]) | |
| clip3_output = gr.Video(label="Clip 3", elem_classes=["output-video"]) | |
| with gr.Accordion("📋 Processing Log", open=True): | |
| log_output = gr.Textbox( | |
| label="Log", | |
| lines=10, | |
| interactive=False, | |
| show_copy_button=True | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **ShortSmith v2** | Powered by Qwen2-VL, InsightFace, and Librosa | | |
| [GitHub](https://github.com) | Built with Gradio | |
| """) | |
| # Connect the button to the processing function | |
| process_btn.click( | |
| fn=process_video, | |
| inputs=[ | |
| video_input, | |
| domain_dropdown, | |
| num_clips_slider, | |
| duration_slider, | |
| reference_image, | |
| custom_prompt | |
| ], | |
| outputs=[ | |
| status_output, | |
| clip1_output, | |
| clip2_output, | |
| clip3_output, | |
| log_output | |
| ], | |
| show_progress="full" | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.queue() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True | |
| ) | |
| else: | |
| # For HuggingFace Spaces | |
| demo.queue() | |
| demo.launch() | |