dev_caio / app.py
Chaitanya-02's picture
Update app.py
526834b verified
"""
ShortSmith v2 - Gradio Application
Hugging Face Space interface for video highlight extraction.
Features:
- Multi-modal analysis (visual + audio + motion)
- Domain-optimized presets
- Person-specific filtering (optional)
- Scene-aware clip cutting
"""
import os
import sys
import tempfile
import shutil
from pathlib import Path
import time
import traceback
import gradio as gr
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent))
# Initialize logging
try:
from utils.logger import setup_logging, get_logger
setup_logging(log_level="INFO", log_to_console=True)
logger = get_logger("app")
except Exception:
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("app")
def process_video(
video_file,
domain,
num_clips,
clip_duration,
reference_image,
custom_prompt,
progress=gr.Progress()
):
"""
Main video processing function.
Args:
video_file: Uploaded video file path
domain: Content domain for scoring weights
num_clips: Number of clips to extract
clip_duration: Duration of each clip in seconds
reference_image: Optional reference image for person filtering
custom_prompt: Optional custom instructions
progress: Gradio progress tracker
Returns:
Tuple of (status_message, clip1, clip2, clip3, log_text)
"""
if video_file is None:
return "Please upload a video first.", None, None, None, ""
log_messages = []
def log(msg):
log_messages.append(f"[{time.strftime('%H:%M:%S')}] {msg}")
logger.info(msg)
try:
video_path = Path(video_file)
log(f"Processing video: {video_path.name}")
progress(0.05, desc="Validating video...")
# Import pipeline components
from utils.helpers import validate_video_file, validate_image_file, format_duration
from pipeline.orchestrator import PipelineOrchestrator
# Validate video
validation = validate_video_file(video_file)
if not validation.is_valid:
return f"Error: {validation.error_message}", None, None, None, "\n".join(log_messages)
log(f"Video size: {validation.file_size / (1024*1024):.1f} MB")
# Validate reference image if provided
ref_path = None
if reference_image is not None:
ref_validation = validate_image_file(reference_image)
if ref_validation.is_valid:
ref_path = reference_image
log(f"Reference image: {Path(reference_image).name}")
else:
log(f"Warning: Invalid reference image - {ref_validation.error_message}")
# Map domain string to internal value
domain_map = {
"Sports": "sports",
"Vlogs": "vlogs",
"Music Videos": "music",
"Podcasts": "podcasts",
"Gaming": "gaming",
"General": "general",
}
domain_value = domain_map.get(domain, "general")
log(f"Domain: {domain_value}")
# Create output directory
output_dir = Path(tempfile.mkdtemp(prefix="shortsmith_output_"))
log(f"Output directory: {output_dir}")
# Progress callback to update UI during processing
def on_progress(pipeline_progress):
stage = pipeline_progress.stage.value
pct = pipeline_progress.progress
msg = pipeline_progress.message
log(f"[{stage}] {msg}")
# Map pipeline progress (0-1) to our range (0.1-0.9)
mapped_progress = 0.1 + (pct * 0.8)
progress(mapped_progress, desc=f"{stage}: {msg}")
# Initialize pipeline
progress(0.1, desc="Initializing AI models...")
log("Initializing pipeline...")
pipeline = PipelineOrchestrator(progress_callback=on_progress)
# Process video
progress(0.15, desc="Starting analysis...")
log(f"Processing: {int(num_clips)} clips @ {int(clip_duration)}s each")
result = pipeline.process(
video_path=video_path,
num_clips=int(num_clips),
clip_duration=float(clip_duration),
domain=domain_value,
reference_image=ref_path,
custom_prompt=custom_prompt.strip() if custom_prompt else None,
)
progress(0.9, desc="Extracting clips...")
# Handle result
if result.success:
log(f"Processing complete in {result.processing_time:.1f}s")
clip_paths = []
for i, clip in enumerate(result.clips):
if clip.clip_path.exists():
output_path = output_dir / f"highlight_{i+1}.mp4"
shutil.copy2(clip.clip_path, output_path)
clip_paths.append(str(output_path))
log(f"Clip {i+1}: {format_duration(clip.start_time)} - {format_duration(clip.end_time)} (score: {clip.hype_score:.2f})")
status = f"Successfully extracted {len(clip_paths)} highlight clips!\nProcessing time: {result.processing_time:.1f}s"
pipeline.cleanup()
progress(1.0, desc="Done!")
# Return up to 3 clips
clip1 = clip_paths[0] if len(clip_paths) > 0 else None
clip2 = clip_paths[1] if len(clip_paths) > 1 else None
clip3 = clip_paths[2] if len(clip_paths) > 2 else None
return status, clip1, clip2, clip3, "\n".join(log_messages)
else:
log(f"Processing failed: {result.error_message}")
pipeline.cleanup()
return f"Error: {result.error_message}", None, None, None, "\n".join(log_messages)
except Exception as e:
error_msg = f"Unexpected error: {str(e)}"
log(error_msg)
log(traceback.format_exc())
logger.exception("Pipeline error")
return error_msg, None, None, None, "\n".join(log_messages)
# Build Gradio interface
with gr.Blocks(
title="ShortSmith v2",
theme=gr.themes.Soft(),
css="""
.container { max-width: 1200px; margin: auto; }
.output-video { min-height: 200px; }
"""
) as demo:
gr.Markdown("""
# 🎬 ShortSmith v2
### AI-Powered Video Highlight Extractor
Upload a video and automatically extract the most engaging highlight clips using AI analysis.
""")
with gr.Row():
# Left column - Inputs
with gr.Column(scale=1):
gr.Markdown("### 📤 Input")
video_input = gr.Video(
label="Upload Video",
sources=["upload"],
)
with gr.Accordion("⚙️ Settings", open=True):
domain_dropdown = gr.Dropdown(
choices=["Sports", "Vlogs", "Music Videos", "Podcasts", "Gaming", "General"],
value="General",
label="Content Domain",
info="Select the type of content for optimized scoring"
)
with gr.Row():
num_clips_slider = gr.Slider(
minimum=1,
maximum=3,
value=3,
step=1,
label="Number of Clips",
info="How many highlight clips to extract"
)
duration_slider = gr.Slider(
minimum=5,
maximum=30,
value=15,
step=1,
label="Clip Duration (seconds)",
info="Target duration for each clip"
)
with gr.Accordion("👤 Person Filtering (Optional)", open=False):
reference_image = gr.Image(
label="Reference Image",
type="filepath",
sources=["upload"],
)
gr.Markdown("*Upload a photo of a person to prioritize clips featuring them.*")
with gr.Accordion("📝 Custom Instructions (Optional)", open=False):
custom_prompt = gr.Textbox(
label="Additional Instructions",
placeholder="E.g., 'Focus on crowd reactions' or 'Prioritize action scenes'",
lines=2,
)
process_btn = gr.Button(
"🚀 Extract Highlights",
variant="primary",
size="lg"
)
# Right column - Outputs
with gr.Column(scale=1):
gr.Markdown("### 📥 Output")
status_output = gr.Textbox(
label="Status",
lines=2,
interactive=False
)
gr.Markdown("#### Extracted Clips")
clip1_output = gr.Video(label="Clip 1", elem_classes=["output-video"])
clip2_output = gr.Video(label="Clip 2", elem_classes=["output-video"])
clip3_output = gr.Video(label="Clip 3", elem_classes=["output-video"])
with gr.Accordion("📋 Processing Log", open=True):
log_output = gr.Textbox(
label="Log",
lines=10,
interactive=False,
show_copy_button=True
)
gr.Markdown("""
---
**ShortSmith v2** | Powered by Qwen2-VL, InsightFace, and Librosa |
[GitHub](https://github.com) | Built with Gradio
""")
# Connect the button to the processing function
process_btn.click(
fn=process_video,
inputs=[
video_input,
domain_dropdown,
num_clips_slider,
duration_slider,
reference_image,
custom_prompt
],
outputs=[
status_output,
clip1_output,
clip2_output,
clip3_output,
log_output
],
show_progress="full"
)
# Launch the app
if __name__ == "__main__":
demo.queue()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)
else:
# For HuggingFace Spaces
demo.queue()
demo.launch()