Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
from fastapi import FastAPI, HTTPException
|
|
|
|
| 2 |
import subprocess
|
| 3 |
import base64
|
| 4 |
import os
|
|
@@ -8,17 +9,20 @@ import whisper
|
|
| 8 |
|
| 9 |
app = FastAPI()
|
| 10 |
|
| 11 |
-
# Load Whisper Tiny
|
| 12 |
print("Loading Whisper...")
|
| 13 |
whisper_model = whisper.load_model("tiny")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
def file_to_base64(filepath):
|
| 16 |
if not os.path.exists(filepath): return None
|
| 17 |
with open(filepath, "rb") as f:
|
| 18 |
return base64.b64encode(f.read()).decode('utf-8')
|
| 19 |
|
| 20 |
@app.post("/process-video")
|
| 21 |
-
async def process_video(
|
| 22 |
job_id = str(uuid.uuid4())
|
| 23 |
work_dir = f"/tmp/viralcat_{job_id}"
|
| 24 |
os.makedirs(work_dir, exist_ok=True)
|
|
@@ -26,10 +30,11 @@ async def process_video(file: UploadFile = File(...)):
|
|
| 26 |
audio_path = os.path.join(work_dir, "audio.wav")
|
| 27 |
|
| 28 |
try:
|
| 29 |
-
|
| 30 |
-
|
|
|
|
| 31 |
|
| 32 |
-
#
|
| 33 |
probe = subprocess.run([
|
| 34 |
"ffprobe", "-v", "error", "-show_entries",
|
| 35 |
"format=duration", "-of", "default=noprint_wrappers=1:nokey=1",
|
|
@@ -37,8 +42,7 @@ async def process_video(file: UploadFile = File(...)):
|
|
| 37 |
], capture_output=True, text=True, check=True)
|
| 38 |
duration = float(probe.stdout.strip() or 0)
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
# Calculate fps to get 15 frames (e.g. if 30s, fps is 15/30 = 0.5)
|
| 42 |
fps = 15 / max(duration, 1)
|
| 43 |
subprocess.run([
|
| 44 |
"ffmpeg", "-y", "-i", video_path,
|
|
@@ -47,7 +51,7 @@ async def process_video(file: UploadFile = File(...)):
|
|
| 47 |
"-q:v", "4", f"{work_dir}/frame_%03d.jpg"
|
| 48 |
], check=True, capture_output=True)
|
| 49 |
|
| 50 |
-
#
|
| 51 |
subprocess.run([
|
| 52 |
"ffmpeg", "-y", "-i", video_path,
|
| 53 |
"-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
|
|
@@ -58,7 +62,7 @@ async def process_video(file: UploadFile = File(...)):
|
|
| 58 |
result = whisper_model.transcribe(audio_path)
|
| 59 |
transcript = result["text"].strip()
|
| 60 |
|
| 61 |
-
#
|
| 62 |
frame_files = sorted([f for f in os.listdir(work_dir) if f.startswith("frame_")])
|
| 63 |
frames_b64 = [file_to_base64(os.path.join(work_dir, f)) for f in frame_files]
|
| 64 |
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
import subprocess
|
| 4 |
import base64
|
| 5 |
import os
|
|
|
|
| 9 |
|
| 10 |
app = FastAPI()
|
| 11 |
|
| 12 |
+
# Load Whisper Tiny once on startup
|
| 13 |
print("Loading Whisper...")
|
| 14 |
whisper_model = whisper.load_model("tiny")
|
| 15 |
|
| 16 |
+
class VideoJsonRequest(BaseModel):
|
| 17 |
+
video_base64: str
|
| 18 |
+
|
| 19 |
def file_to_base64(filepath):
|
| 20 |
if not os.path.exists(filepath): return None
|
| 21 |
with open(filepath, "rb") as f:
|
| 22 |
return base64.b64encode(f.read()).decode('utf-8')
|
| 23 |
|
| 24 |
@app.post("/process-video")
|
| 25 |
+
async def process_video(req: VideoJsonRequest):
|
| 26 |
job_id = str(uuid.uuid4())
|
| 27 |
work_dir = f"/tmp/viralcat_{job_id}"
|
| 28 |
os.makedirs(work_dir, exist_ok=True)
|
|
|
|
| 30 |
audio_path = os.path.join(work_dir, "audio.wav")
|
| 31 |
|
| 32 |
try:
|
| 33 |
+
# 1. Decode Base64 string into actual MP4 file
|
| 34 |
+
with open(video_path, "wb") as f:
|
| 35 |
+
f.write(base64.b64decode(req.video_base64))
|
| 36 |
|
| 37 |
+
# 2. Get Duration
|
| 38 |
probe = subprocess.run([
|
| 39 |
"ffprobe", "-v", "error", "-show_entries",
|
| 40 |
"format=duration", "-of", "default=noprint_wrappers=1:nokey=1",
|
|
|
|
| 42 |
], capture_output=True, text=True, check=True)
|
| 43 |
duration = float(probe.stdout.strip() or 0)
|
| 44 |
|
| 45 |
+
# 3. Extract exactly 15 frames
|
|
|
|
| 46 |
fps = 15 / max(duration, 1)
|
| 47 |
subprocess.run([
|
| 48 |
"ffmpeg", "-y", "-i", video_path,
|
|
|
|
| 51 |
"-q:v", "4", f"{work_dir}/frame_%03d.jpg"
|
| 52 |
], check=True, capture_output=True)
|
| 53 |
|
| 54 |
+
# 4. Extract & Transcribe Audio
|
| 55 |
subprocess.run([
|
| 56 |
"ffmpeg", "-y", "-i", video_path,
|
| 57 |
"-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
|
|
|
|
| 62 |
result = whisper_model.transcribe(audio_path)
|
| 63 |
transcript = result["text"].strip()
|
| 64 |
|
| 65 |
+
# 5. Gather frames
|
| 66 |
frame_files = sorted([f for f in os.listdir(work_dir) if f.startswith("frame_")])
|
| 67 |
frames_b64 = [file_to_base64(os.path.join(work_dir, f)) for f in frame_files]
|
| 68 |
|