ffmpeg_cats

Running

App Files Files Community

Pepguy commited on 20 days ago

Commit

057ca82

verified ·

1 Parent(s): 2d9fd0c

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -10

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from fastapi import FastAPI, HTTPException, UploadFile, File, Form
 import subprocess
 import base64
 import os
@@ -8,17 +9,20 @@ import whisper
 app = FastAPI()
-# Load Whisper Tiny (Fastest) once on startup
 print("Loading Whisper...")
 whisper_model = whisper.load_model("tiny")
 def file_to_base64(filepath):
     if not os.path.exists(filepath): return None
     with open(filepath, "rb") as f:
         return base64.b64encode(f.read()).decode('utf-8')
 @app.post("/process-video")
-async def process_video(file: UploadFile = File(...)):
     job_id = str(uuid.uuid4())
     work_dir = f"/tmp/viralcat_{job_id}"
     os.makedirs(work_dir, exist_ok=True)
@@ -26,10 +30,11 @@ async def process_video(file: UploadFile = File(...)):
     audio_path = os.path.join(work_dir, "audio.wav")
     try:
-        with open(video_path, "wb") as buffer:
-            shutil.copyfileobj(file.file, buffer)
-        # 1. Get Duration
         probe = subprocess.run([
             "ffprobe", "-v", "error", "-show_entries",
             "format=duration", "-of", "default=noprint_wrappers=1:nokey=1",
@@ -37,8 +42,7 @@ async def process_video(file: UploadFile = File(...)):
         ], capture_output=True, text=True, check=True)
         duration = float(probe.stdout.strip() or 0)
-        # 2. Extract exactly 15 frames regardless of length
-        # Calculate fps to get 15 frames (e.g. if 30s, fps is 15/30 = 0.5)
         fps = 15 / max(duration, 1)
         subprocess.run([
             "ffmpeg", "-y", "-i", video_path,
@@ -47,7 +51,7 @@ async def process_video(file: UploadFile = File(...)):
             "-q:v", "4", f"{work_dir}/frame_%03d.jpg"
         ], check=True, capture_output=True)
-        # 3. Extract & Transcribe Audio
         subprocess.run([
             "ffmpeg", "-y", "-i", video_path,
             "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
@@ -58,7 +62,7 @@ async def process_video(file: UploadFile = File(...)):
             result = whisper_model.transcribe(audio_path)
             transcript = result["text"].strip()
-        # 4. Gather frames
         frame_files = sorted([f for f in os.listdir(work_dir) if f.startswith("frame_")])
         frames_b64 = [file_to_base64(os.path.join(work_dir, f)) for f in frame_files]

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
 import subprocess
 import base64
 import os
 app = FastAPI()
+# Load Whisper Tiny once on startup
 print("Loading Whisper...")
 whisper_model = whisper.load_model("tiny")
+class VideoJsonRequest(BaseModel):
+    video_base64: str
 def file_to_base64(filepath):
     if not os.path.exists(filepath): return None
     with open(filepath, "rb") as f:
         return base64.b64encode(f.read()).decode('utf-8')
 @app.post("/process-video")
+async def process_video(req: VideoJsonRequest):
     job_id = str(uuid.uuid4())
     work_dir = f"/tmp/viralcat_{job_id}"
     os.makedirs(work_dir, exist_ok=True)
     audio_path = os.path.join(work_dir, "audio.wav")
     try:
+        # 1. Decode Base64 string into actual MP4 file
+        with open(video_path, "wb") as f:
+            f.write(base64.b64decode(req.video_base64))
+        # 2. Get Duration
         probe = subprocess.run([
             "ffprobe", "-v", "error", "-show_entries",
             "format=duration", "-of", "default=noprint_wrappers=1:nokey=1",
         ], capture_output=True, text=True, check=True)
         duration = float(probe.stdout.strip() or 0)
+        # 3. Extract exactly 15 frames
         fps = 15 / max(duration, 1)
         subprocess.run([
             "ffmpeg", "-y", "-i", video_path,
             "-q:v", "4", f"{work_dir}/frame_%03d.jpg"
         ], check=True, capture_output=True)
+        # 4. Extract & Transcribe Audio
         subprocess.run([
             "ffmpeg", "-y", "-i", video_path,
             "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
             result = whisper_model.transcribe(audio_path)
             transcript = result["text"].strip()
+        # 5. Gather frames
         frame_files = sorted([f for f in os.listdir(work_dir) if f.startswith("frame_")])
         frames_b64 = [file_to_base64(os.path.join(work_dir, f)) for f in frame_files]