Spaces:
Sleeping
Sleeping
File size: 7,264 Bytes
91d209c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
"""
Frame Extraction API endpoints
Intelligent frame selection using Whisper
"""
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import List, Optional
import tempfile
import os
from utils.whisper_trim import (
extract_post_speech_frames,
find_last_word_timestamp,
trim_video_to_last_word,
is_whisper_available
)
router = APIRouter()
class FrameExtractionRequest(BaseModel):
video_url: str
script: str
buffer_time: Optional[float] = 0.3
num_frames: Optional[int] = 3
model_size: Optional[str] = "base"
class FrameExtractionResponse(BaseModel):
frames: List[dict] # [{timestamp, frame_data_url, label}]
last_word_time: float
total_duration: float
@router.post("/extract-frames", response_model=FrameExtractionResponse)
async def extract_frames_api(request: FrameExtractionRequest):
"""
Extract transition frames using Whisper to detect last spoken word
"""
if not is_whisper_available():
raise HTTPException(
status_code=501,
detail="Whisper not installed. Install with: pip install openai-whisper moviepy"
)
try:
# Download video temporarily
import httpx
temp_video = tempfile.mktemp(suffix='.mp4')
async with httpx.AsyncClient() as client:
response = await client.get(request.video_url)
if response.status_code != 200:
raise HTTPException(
status_code=400,
detail=f"Failed to download video: {response.status_code}"
)
with open(temp_video, 'wb') as f:
f.write(response.content)
frames = []
last_word_time = None
try:
# Prefer Whisper-based post-speech detection
frames = extract_post_speech_frames(
temp_video,
request.script,
buffer_time=request.buffer_time,
num_frames=request.num_frames,
model_size=request.model_size
)
# Get last word timestamp
last_word_time = find_last_word_timestamp(
temp_video,
request.script,
model_size=request.model_size
)
except Exception as whisper_err:
# Fallback: simple fixed timestamps near the end of the video
print(f"⚠️ Whisper-based frame extraction failed: {whisper_err}")
try:
from moviepy.editor import VideoFileClip
from utils.video_processor import extract_frame
clip = VideoFileClip(temp_video)
duration = clip.duration
clip.close()
fallback_timestamps = [
max(0, duration - 1.5),
max(0, duration - 1.0),
max(0, duration - 0.5),
]
labels = ["Early End", "Mid End", "Final Frame"]
for ts, label in zip(fallback_timestamps, labels):
frame_data = extract_frame(temp_video, ts, return_base64=True)
frames.append((ts, frame_data, label))
last_word_time = fallback_timestamps[-1] if fallback_timestamps else None
print("✅ Returned fallback frames near video end.")
except Exception as fallback_err:
print(f"❌ Fallback frame extraction failed: {fallback_err}")
raise HTTPException(
status_code=500,
detail=f"Frame extraction failed: {str(whisper_err)}"
)
# Get video duration
from moviepy.editor import VideoFileClip
clip = VideoFileClip(temp_video)
duration = clip.duration
clip.close()
# Clean up
os.remove(temp_video)
# Format response
frames_data = [
{
"timestamp": timestamp,
"frame_data_url": frame_data,
"label": label
}
for timestamp, frame_data, label in frames
]
return FrameExtractionResponse(
frames=frames_data,
last_word_time=last_word_time,
total_duration=duration
)
except Exception as e:
# Clean up temp file if it exists
if 'temp_video' in locals() and os.path.exists(temp_video):
os.remove(temp_video)
raise HTTPException(
status_code=500,
detail=f"Frame extraction failed: {str(e)}"
)
@router.post("/trim-video")
async def trim_video_api(
video_url: str = Form(...),
script: str = Form(...),
padding: float = Form(0.5),
model_size: str = Form("base")
):
"""
Trim video to end after last spoken word
"""
if not is_whisper_available():
raise HTTPException(
status_code=501,
detail="Whisper not installed. Install with: pip install openai-whisper moviepy"
)
try:
# Download video temporarily
import httpx
temp_video = tempfile.mktemp(suffix='.mp4')
output_video = tempfile.mktemp(suffix='_trimmed.mp4')
async with httpx.AsyncClient() as client:
response = await client.get(video_url)
if response.status_code != 200:
raise HTTPException(
status_code=400,
detail=f"Failed to download video: {response.status_code}"
)
with open(temp_video, 'wb') as f:
f.write(response.content)
# Trim video
output_path = trim_video_to_last_word(
temp_video,
script,
output_video,
padding=padding,
model_size=model_size
)
# Read trimmed video
with open(output_path, 'rb') as f:
video_data = f.read()
# Clean up
os.remove(temp_video)
os.remove(output_video)
# Return trimmed video
from fastapi.responses import Response
return Response(
content=video_data,
media_type="video/mp4",
headers={
"Content-Disposition": "attachment; filename=trimmed_video.mp4"
}
)
except Exception as e:
# Clean up temp files if they exist
for temp_file in ['temp_video', 'output_video']:
if temp_file in locals() and os.path.exists(locals()[temp_file]):
os.remove(locals()[temp_file])
raise HTTPException(
status_code=500,
detail=f"Video trimming failed: {str(e)}"
)
@router.get("/whisper-status")
async def whisper_status():
"""
Check if Whisper is available
"""
return {
"available": is_whisper_available(),
"message": "Whisper is available" if is_whisper_available()
else "Install with: pip install openai-whisper moviepy"
}
|