import os import subprocess import concurrent.futures import uuid import time import json from typing import List, Optional, Dict from fastapi import FastAPI, BackgroundTasks, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import cloudinary import cloudinary.uploader from ass_generator import generate_ass # ------------------------------------------ # CONFIGURATION # ------------------------------------------ CLOUD_NAME = "dgfhhszx8" UPLOAD_PRESET = "testing" JOBS: Dict[str, dict] = {} app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class TranscriptItem(BaseModel): text: str start: float end: float class VideoRequest(BaseModel): video_url: str transcript: Optional[List[TranscriptItem]] = None style: Optional[str] = "hormozi" # ------------------------------------------ # BACKGROUND WORKER # ------------------------------------------ def process_video_background(job_id: str, req: VideoRequest): print(f"[{job_id}] Starting Job") JOBS[job_id]["status"] = "processing" work_dir = f"/tmp/{job_id}" os.makedirs(work_dir, exist_ok=True) ass_file = os.path.join(work_dir, "captions.ass") output_video = os.path.join(work_dir, "output.webm") # WebM for transparency try: # 1. CALCULATE DURATION duration = 5.0 # Default if req.transcript and len(req.transcript) > 0: last_item = req.transcript[-1] duration = float(last_item.end) + 1.0 # 1s buffer JOBS[job_id]["progress"] = f"Generating {duration}s Transparent Layer..." # 2. GENERATE CAPTIONS if req.transcript: transcript_dicts = [t.dict() for t in req.transcript] generate_ass(transcript_dicts, style_name=req.style, output_file=ass_file) # DEBUG: Print the ASS file contents with open(ass_file, 'r') as f: ass_content = f.read() print(f"[{job_id}] ASS File Content:\n{ass_content}") # 3. GENERATE TRANSPARENT WEBM # Key fixes: # - Use shell=True with properly escaped command # - Use drawtext as fallback if ASS fails # - Ensure alpha channel is preserved # Escape special chars for shell ass_file_shell = ass_file.replace("'", "'\\''") # Method 1: Try with ASS filter cmd = ( f"ffmpeg -y " f"-f lavfi -i 'color=c=0x000000@0:s=1280x720:d={duration}:r=30,format=rgba' " f"-vf \"ass='{ass_file_shell}'\" " f"-c:v libvpx-vp9 -pix_fmt yuva420p -auto-alt-ref 0 " f"-b:v 1M -deadline realtime -cpu-used 8 " f"'{output_video}'" ) print(f"[{job_id}] Running FFmpeg command:\n{cmd}") result = subprocess.run( cmd, shell=True, capture_output=True, text=True ) print(f"[{job_id}] FFmpeg STDOUT: {result.stdout}") print(f"[{job_id}] FFmpeg STDERR: {result.stderr}") if result.returncode != 0: raise Exception(f"FFmpeg failed: {result.stderr}") # Verify output file was created and has content if not os.path.exists(output_video): raise Exception("Output video file was not created") file_size = os.path.getsize(output_video) print(f"[{job_id}] Output video size: {file_size} bytes") if file_size < 1000: raise Exception(f"Output video too small ({file_size} bytes), likely empty") upload_target = output_video else: raise Exception("No transcript provided") # 4. UPLOAD JOBS[job_id]["progress"] = "Uploading Transparent Layer..." res_vid = cloudinary.uploader.unsigned_upload( output_video, UPLOAD_PRESET, cloud_name=CLOUD_NAME, resource_type="video" ) # DEBUG: Upload ASS file too res_ass = cloudinary.uploader.unsigned_upload( ass_file, UPLOAD_PRESET, cloud_name=CLOUD_NAME, resource_type="raw" ) url_vid = res_vid['secure_url'] url_ass = res_ass['secure_url'] # 5. CLEANUP try: if os.path.exists(output_video): os.remove(output_video) if os.path.exists(ass_file): os.remove(ass_file) os.rmdir(work_dir) except: pass JOBS[job_id]["status"] = "completed" JOBS[job_id]["progress"] = "Done" JOBS[job_id]["result"] = [url_vid, url_ass] except Exception as e: import traceback error_details = traceback.format_exc() print(f"[{job_id}] FAILED: {error_details}") JOBS[job_id]["status"] = "failed" JOBS[job_id]["error"] = str(e) # ------------------------------------------ # API ENDPOINTS # ------------------------------------------ @app.post("/jobs") def submit_job(req: VideoRequest, background_tasks: BackgroundTasks): job_id = str(uuid.uuid4()) JOBS[job_id] = { "status": "queued", "progress": "Waiting...", "result": None, "created_at": time.time() } background_tasks.add_task(process_video_background, job_id, req) return {"job_id": job_id, "status": "queued"} @app.get("/jobs/{job_id}") def get_job_status(job_id: str): job = JOBS.get(job_id) if not job: raise HTTPException(status_code=404) return job @app.get("/") def home(): return {"message": "Caption Engine V3 Running"} @app.get("/debug/test") def debug_test(): """ Debug endpoint that generates a simple test video with text to verify FFmpeg is working correctly with transparency. """ import shutil work_dir = "/tmp/debug_test" output_path = os.path.join(work_dir, "test.webm") try: os.makedirs(work_dir, exist_ok=True) # Simple test: Generate transparent video with drawtext cmd = ( f"ffmpeg -y " f"-f lavfi -i 'color=black@0:s=1280x720:d=3,format=rgba' " f"-vf \"drawtext=text='HELLO WORLD':fontsize=60:fontcolor=yellow:x=(w-text_w)/2:y=(h-text_h)/2\" " f"-c:v libvpx-vp9 -pix_fmt yuva420p -auto-alt-ref 0 " f"-b:v 1M " f"'{output_path}'" ) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) response = { "command": cmd, "stdout": result.stdout, "stderr": result.stderr, "return_code": result.returncode, "file_exists": os.path.exists(output_path), "file_size": os.path.getsize(output_path) if os.path.exists(output_path) else 0 } # Upload to cloudinary if successful if result.returncode == 0 and os.path.exists(output_path): res = cloudinary.uploader.unsigned_upload( output_path, UPLOAD_PRESET, cloud_name=CLOUD_NAME, resource_type="video" ) response["cloudinary_url"] = res.get("secure_url") # Cleanup shutil.rmtree(work_dir, ignore_errors=True) return response except Exception as e: import traceback return {"error": str(e), "traceback": traceback.format_exc()} @app.get("/debug/fonts") def debug_fonts(): """Check available fonts on the system""" result = subprocess.run( "fc-list : family | sort | uniq", shell=True, capture_output=True, text=True ) fonts = result.stdout.strip().split("\n") return {"fonts": fonts[:50], "total": len(fonts)} # Limit output @app.get("/debug/ffmpeg") def debug_ffmpeg(): """Check FFmpeg version and capabilities""" version = subprocess.run("ffmpeg -version | head -5", shell=True, capture_output=True, text=True) encoders = subprocess.run("ffmpeg -encoders 2>/dev/null | grep vp9", shell=True, capture_output=True, text=True) filters = subprocess.run("ffmpeg -filters 2>/dev/null | grep -E '(ass|subtitles)'", shell=True, capture_output=True, text=True) return { "version": version.stdout, "vp9_encoders": encoders.stdout, "subtitle_filters": filters.stdout }