Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| FastAPI Wrapper for Audio-Enhanced Video Highlights | |
| Converts your SmolVLM2 + Whisper system into a web API for Android apps | |
| """ | |
| import os | |
| import tempfile | |
| # Set cache directories to writable locations for HuggingFace Spaces | |
| # Use /tmp which is guaranteed to be writable in containers | |
| CACHE_DIR = os.path.join("/tmp", ".cache", "huggingface") | |
| os.makedirs(CACHE_DIR, exist_ok=True) | |
| os.makedirs(os.path.join("/tmp", ".cache", "torch"), exist_ok=True) | |
| os.environ['HF_HOME'] = CACHE_DIR | |
| os.environ['TRANSFORMERS_CACHE'] = CACHE_DIR | |
| os.environ['HF_DATASETS_CACHE'] = CACHE_DIR | |
| os.environ['TORCH_HOME'] = os.path.join("/tmp", ".cache", "torch") | |
| os.environ['XDG_CACHE_HOME'] = os.path.join("/tmp", ".cache") | |
| os.environ['HUGGINGFACE_HUB_CACHE'] = CACHE_DIR | |
| os.environ['TOKENIZERS_PARALLELISM'] = 'false' | |
| from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks | |
| from fastapi.responses import FileResponse, JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| import sys | |
| import uuid | |
| import json | |
| import asyncio | |
| from pathlib import Path | |
| from typing import Optional | |
| import logging | |
| # Add src directory to path for imports | |
| sys.path.append(str(Path(__file__).parent / "src")) | |
| try: | |
| from audio_enhanced_highlights_final import AudioVisualAnalyzer, extract_frames_at_intervals, save_frame_at_time, create_highlights_video | |
| except ImportError: | |
| print("❌ Cannot import audio_enhanced_highlights_final.py") | |
| sys.exit(1) | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # FastAPI app | |
| app = FastAPI( | |
| title="SmolVLM2 Video Highlights API", | |
| description="Generate intelligent video highlights using SmolVLM2 + Whisper", | |
| version="1.0.0" | |
| ) | |
| # Enable CORS for Android apps | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # In production, specify your Android app's domain | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Request/Response models | |
| class AnalysisRequest(BaseModel): | |
| interval: float = 20.0 | |
| min_score: float = 6.5 | |
| max_highlights: int = 3 | |
| whisper_model: str = "base" | |
| timeout: int = 35 | |
| class AnalysisResponse(BaseModel): | |
| job_id: str | |
| status: str | |
| message: str | |
| class JobStatus(BaseModel): | |
| job_id: str | |
| status: str # "processing", "completed", "failed" | |
| progress: int # 0-100 | |
| message: str | |
| highlights_url: Optional[str] = None | |
| analysis_url: Optional[str] = None | |
| # Global storage for jobs (in production, use Redis/database) | |
| active_jobs = {} | |
| completed_jobs = {} | |
| # Create output directories with proper permissions | |
| import tempfile | |
| import stat | |
| # Use /tmp directory for HuggingFace Spaces compatibility (writable location) | |
| TEMP_DIR = os.path.join("/tmp", "temp") | |
| OUTPUTS_DIR = os.path.join("/tmp", "outputs") | |
| # Create directories with proper permissions | |
| os.makedirs(OUTPUTS_DIR, mode=0o755, exist_ok=True) | |
| os.makedirs(TEMP_DIR, mode=0o755, exist_ok=True) | |
| async def root(): | |
| return { | |
| "message": "SmolVLM2 Video Highlights API", | |
| "version": "1.0.0", | |
| "endpoints": { | |
| "upload": "/upload-video", | |
| "status": "/job-status/{job_id}", | |
| "download": "/download/{filename}" | |
| } | |
| } | |
| async def upload_video( | |
| background_tasks: BackgroundTasks, | |
| video: UploadFile = File(...), | |
| interval: float = 5.0, | |
| min_score: float = 3.0, | |
| max_highlights: int = 3, | |
| whisper_model: str = "base", | |
| timeout: int = 60, | |
| enable_visual: bool = True | |
| ): | |
| """ | |
| Upload a video and start processing highlights | |
| """ | |
| # Validate file | |
| if not video.filename.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')): | |
| raise HTTPException(status_code=400, detail="Only video files are supported") | |
| # Generate unique job ID | |
| job_id = str(uuid.uuid4()) | |
| try: | |
| # Save uploaded video to system temp directory | |
| safe_filename = "".join(c for c in video.filename if c.isalnum() or c in '._-') | |
| temp_video_path = os.path.join(TEMP_DIR, f"{job_id}_{safe_filename}") | |
| with open(temp_video_path, "wb") as f: | |
| content = await video.read() | |
| f.write(content) | |
| # Store job info | |
| active_jobs[job_id] = { | |
| "status": "processing", | |
| "progress": 0, | |
| "message": "Video uploaded, starting analysis...", | |
| "video_path": temp_video_path, | |
| "settings": { | |
| "interval": interval, | |
| "min_score": min_score, | |
| "max_highlights": max_highlights, | |
| "whisper_model": whisper_model, | |
| "timeout": timeout | |
| } | |
| } | |
| # Store job parameters for immediate return | |
| active_jobs[job_id] = { | |
| "status": "queued", | |
| "progress": 0, | |
| "message": "Video uploaded. Processing will start shortly.", | |
| "params": { | |
| "video_path": temp_video_path, | |
| "interval": interval, | |
| "min_score": min_score, | |
| "max_highlights": max_highlights, | |
| "whisper_model": whisper_model, | |
| "timeout": timeout, | |
| "enable_visual": enable_visual | |
| } | |
| } | |
| # Return immediately - processing will be triggered by first status check | |
| return AnalysisResponse( | |
| job_id=job_id, | |
| status="queued", | |
| message="Video uploaded successfully. Check status to begin processing." | |
| ) | |
| except Exception as e: | |
| logger.error(f"Upload failed: {e}") | |
| raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}") | |
| async def get_job_status(job_id: str): | |
| """ | |
| Get the status of a processing job | |
| """ | |
| # Check active jobs | |
| if job_id in active_jobs: | |
| job = active_jobs[job_id] | |
| # If job is queued, start processing | |
| if job["status"] == "queued": | |
| # Start processing in background | |
| params = job["params"] | |
| asyncio.create_task( | |
| process_video_highlights_async( | |
| job_id, | |
| params["video_path"], | |
| params["interval"], | |
| params["min_score"], | |
| params["max_highlights"], | |
| params["whisper_model"], | |
| params["timeout"], | |
| params["enable_visual"] | |
| ) | |
| ) | |
| # Update status to processing | |
| active_jobs[job_id]["status"] = "processing" | |
| active_jobs[job_id]["progress"] = 5 | |
| active_jobs[job_id]["message"] = "Processing started..." | |
| return JobStatus( | |
| job_id=job_id, | |
| status=job["status"], | |
| progress=job["progress"], | |
| message=job["message"] | |
| ) | |
| # Check completed jobs | |
| if job_id in completed_jobs: | |
| job = completed_jobs[job_id] | |
| return JobStatus( | |
| job_id=job_id, | |
| status=job["status"], | |
| progress=100, | |
| message=job["message"], | |
| highlights_url=job.get("highlights_url"), | |
| analysis_url=job.get("analysis_url") | |
| ) | |
| raise HTTPException(status_code=404, detail="Job not found") | |
| async def download_file(filename: str): | |
| """ | |
| Download generated files | |
| """ | |
| file_path = os.path.join(OUTPUTS_DIR, filename) | |
| if not os.path.exists(file_path): | |
| raise HTTPException(status_code=404, detail="File not found") | |
| return FileResponse( | |
| file_path, | |
| media_type='application/octet-stream', | |
| filename=filename | |
| ) | |
| async def process_video_highlights_async( | |
| job_id: str, | |
| video_path: str, | |
| interval: float, | |
| min_score: float, | |
| max_highlights: int, | |
| whisper_model: str, | |
| timeout: int, | |
| enable_visual: bool | |
| ): | |
| """ | |
| Background task to process video highlights | |
| """ | |
| try: | |
| # Update status | |
| active_jobs[job_id]["progress"] = 10 | |
| active_jobs[job_id]["message"] = "Initializing AI models..." | |
| # Initialize analyzer in visual-only mode for HuggingFace Spaces optimization | |
| analyzer = AudioVisualAnalyzer( | |
| whisper_model_size=whisper_model, | |
| timeout_seconds=timeout, | |
| enable_visual=enable_visual, | |
| visual_only_mode=True # Skip audio processing to focus resources on visual analysis | |
| ) | |
| active_jobs[job_id]["progress"] = 20 | |
| active_jobs[job_id]["message"] = "Extracting video segments..." | |
| # Extract segments | |
| segments = extract_frames_at_intervals(video_path, interval) | |
| total_segments = len(segments) | |
| active_jobs[job_id]["progress"] = 30 | |
| active_jobs[job_id]["message"] = f"Analyzing {total_segments} segments..." | |
| # Analyze segments | |
| analyzed_segments = [] | |
| temp_frame_path = os.path.join(TEMP_DIR, f"{job_id}_frame.jpg") | |
| for i, segment in enumerate(segments): | |
| # Update progress | |
| progress = 30 + int((i / total_segments) * 50) # 30-80% | |
| active_jobs[job_id]["progress"] = progress | |
| active_jobs[job_id]["message"] = f"Analyzing segment {i+1}/{total_segments}" | |
| # Save frame for visual analysis | |
| if save_frame_at_time(video_path, segment['start_time'], temp_frame_path): | |
| # Analyze segment | |
| analysis = analyzer.analyze_segment(video_path, segment, temp_frame_path) | |
| analyzed_segments.append(analysis) | |
| # Yield control to allow other requests | |
| await asyncio.sleep(0) | |
| # Cleanup temp frame | |
| try: | |
| os.unlink(temp_frame_path) | |
| except: | |
| pass | |
| active_jobs[job_id]["progress"] = 85 | |
| active_jobs[job_id]["message"] = "Selecting best highlights..." | |
| # Select best segments | |
| analyzed_segments.sort(key=lambda x: x['combined_score'], reverse=True) | |
| selected_segments = [s for s in analyzed_segments if s['combined_score'] >= min_score] | |
| selected_segments = selected_segments[:max_highlights] | |
| if not selected_segments: | |
| raise Exception(f"No segments met minimum score of {min_score}") | |
| active_jobs[job_id]["progress"] = 90 | |
| active_jobs[job_id]["message"] = f"Creating highlights video with {len(selected_segments)} segments..." | |
| # Create output filenames using absolute paths | |
| highlights_filename = f"{job_id}_highlights.mp4" | |
| analysis_filename = f"{job_id}_analysis.json" | |
| highlights_path = os.path.join(OUTPUTS_DIR, highlights_filename) | |
| analysis_path = os.path.join(OUTPUTS_DIR, analysis_filename) | |
| # Create highlights video | |
| success = create_highlights_video(video_path, selected_segments, highlights_path) | |
| if not success: | |
| raise Exception("Failed to create highlights video") | |
| # Save analysis | |
| analysis_data = { | |
| 'job_id': job_id, | |
| 'input_video': video_path, | |
| 'output_video': highlights_path, | |
| 'settings': { | |
| 'interval': interval, | |
| 'min_score': min_score, | |
| 'max_highlights': max_highlights, | |
| 'whisper_model': whisper_model, | |
| 'timeout': timeout | |
| }, | |
| 'segments': analyzed_segments, | |
| 'selected_segments': selected_segments, | |
| 'summary': { | |
| 'total_segments': len(analyzed_segments), | |
| 'selected_segments': len(selected_segments), | |
| 'processing_time': "Completed successfully" | |
| } | |
| } | |
| with open(analysis_path, 'w') as f: | |
| json.dump(analysis_data, f, indent=2) | |
| # Mark as completed | |
| completed_jobs[job_id] = { | |
| "status": "completed", | |
| "message": f"Successfully created highlights with {len(selected_segments)} segments", | |
| "highlights_url": f"/download/{highlights_filename}", | |
| "analysis_url": f"/download/{analysis_filename}", | |
| "summary": analysis_data['summary'] | |
| } | |
| # Remove from active jobs | |
| del active_jobs[job_id] | |
| # Cleanup temp video | |
| try: | |
| os.unlink(video_path) | |
| except: | |
| pass | |
| except Exception as e: | |
| logger.error(f"Processing failed for job {job_id}: {e}") | |
| # Mark as failed | |
| completed_jobs[job_id] = { | |
| "status": "failed", | |
| "message": f"Processing failed: {str(e)}", | |
| "highlights_url": None, | |
| "analysis_url": None | |
| } | |
| # Remove from active jobs | |
| if job_id in active_jobs: | |
| del active_jobs[job_id] | |
| # Cleanup | |
| try: | |
| os.unlink(video_path) | |
| except: | |
| pass | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |