cap / app.py
ADXabhi's picture
Upload 7 files
14fe91b verified
import os
import subprocess
import concurrent.futures
import uuid
import time
import json
from typing import List, Optional, Dict
from fastapi import FastAPI, BackgroundTasks, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import cloudinary
import cloudinary.uploader
from ass_generator import generate_ass
# ------------------------------------------
# CONFIGURATION
# ------------------------------------------
CLOUD_NAME = "dgfhhszx8"
UPLOAD_PRESET = "testing"
JOBS: Dict[str, dict] = {}
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class TranscriptItem(BaseModel):
text: str
start: float
end: float
class VideoRequest(BaseModel):
video_url: str
transcript: Optional[List[TranscriptItem]] = None
style: Optional[str] = "hormozi"
# ------------------------------------------
# BACKGROUND WORKER
# ------------------------------------------
def process_video_background(job_id: str, req: VideoRequest):
print(f"[{job_id}] Starting Job")
JOBS[job_id]["status"] = "processing"
work_dir = f"/tmp/{job_id}"
os.makedirs(work_dir, exist_ok=True)
ass_file = os.path.join(work_dir, "captions.ass")
output_video = os.path.join(work_dir, "output.webm") # WebM for transparency
try:
# 1. CALCULATE DURATION
duration = 5.0 # Default
if req.transcript and len(req.transcript) > 0:
last_item = req.transcript[-1]
duration = float(last_item.end) + 1.0 # 1s buffer
JOBS[job_id]["progress"] = f"Generating {duration}s Transparent Layer..."
# 2. GENERATE CAPTIONS
if req.transcript:
transcript_dicts = [t.dict() for t in req.transcript]
generate_ass(transcript_dicts, style_name=req.style, output_file=ass_file)
# DEBUG: Print the ASS file contents
with open(ass_file, 'r') as f:
ass_content = f.read()
print(f"[{job_id}] ASS File Content:\n{ass_content}")
# 3. GENERATE TRANSPARENT WEBM
# Key fixes:
# - Use shell=True with properly escaped command
# - Use drawtext as fallback if ASS fails
# - Ensure alpha channel is preserved
# Escape special chars for shell
ass_file_shell = ass_file.replace("'", "'\\''")
# Method 1: Try with ASS filter
cmd = (
f"ffmpeg -y "
f"-f lavfi -i 'color=c=0x000000@0:s=1280x720:d={duration}:r=30,format=rgba' "
f"-vf \"ass='{ass_file_shell}'\" "
f"-c:v libvpx-vp9 -pix_fmt yuva420p -auto-alt-ref 0 "
f"-b:v 1M -deadline realtime -cpu-used 8 "
f"'{output_video}'"
)
print(f"[{job_id}] Running FFmpeg command:\n{cmd}")
result = subprocess.run(
cmd,
shell=True,
capture_output=True,
text=True
)
print(f"[{job_id}] FFmpeg STDOUT: {result.stdout}")
print(f"[{job_id}] FFmpeg STDERR: {result.stderr}")
if result.returncode != 0:
raise Exception(f"FFmpeg failed: {result.stderr}")
# Verify output file was created and has content
if not os.path.exists(output_video):
raise Exception("Output video file was not created")
file_size = os.path.getsize(output_video)
print(f"[{job_id}] Output video size: {file_size} bytes")
if file_size < 1000:
raise Exception(f"Output video too small ({file_size} bytes), likely empty")
upload_target = output_video
else:
raise Exception("No transcript provided")
# 4. UPLOAD
JOBS[job_id]["progress"] = "Uploading Transparent Layer..."
res_vid = cloudinary.uploader.unsigned_upload(
output_video, UPLOAD_PRESET, cloud_name=CLOUD_NAME, resource_type="video"
)
# DEBUG: Upload ASS file too
res_ass = cloudinary.uploader.unsigned_upload(
ass_file, UPLOAD_PRESET, cloud_name=CLOUD_NAME, resource_type="raw"
)
url_vid = res_vid['secure_url']
url_ass = res_ass['secure_url']
# 5. CLEANUP
try:
if os.path.exists(output_video): os.remove(output_video)
if os.path.exists(ass_file): os.remove(ass_file)
os.rmdir(work_dir)
except: pass
JOBS[job_id]["status"] = "completed"
JOBS[job_id]["progress"] = "Done"
JOBS[job_id]["result"] = [url_vid, url_ass]
except Exception as e:
import traceback
error_details = traceback.format_exc()
print(f"[{job_id}] FAILED: {error_details}")
JOBS[job_id]["status"] = "failed"
JOBS[job_id]["error"] = str(e)
# ------------------------------------------
# API ENDPOINTS
# ------------------------------------------
@app.post("/jobs")
def submit_job(req: VideoRequest, background_tasks: BackgroundTasks):
job_id = str(uuid.uuid4())
JOBS[job_id] = {
"status": "queued",
"progress": "Waiting...",
"result": None,
"created_at": time.time()
}
background_tasks.add_task(process_video_background, job_id, req)
return {"job_id": job_id, "status": "queued"}
@app.get("/jobs/{job_id}")
def get_job_status(job_id: str):
job = JOBS.get(job_id)
if not job: raise HTTPException(status_code=404)
return job
@app.get("/")
def home():
return {"message": "Caption Engine V3 Running"}
@app.get("/debug/test")
def debug_test():
"""
Debug endpoint that generates a simple test video with text
to verify FFmpeg is working correctly with transparency.
"""
import shutil
work_dir = "/tmp/debug_test"
output_path = os.path.join(work_dir, "test.webm")
try:
os.makedirs(work_dir, exist_ok=True)
# Simple test: Generate transparent video with drawtext
cmd = (
f"ffmpeg -y "
f"-f lavfi -i 'color=black@0:s=1280x720:d=3,format=rgba' "
f"-vf \"drawtext=text='HELLO WORLD':fontsize=60:fontcolor=yellow:x=(w-text_w)/2:y=(h-text_h)/2\" "
f"-c:v libvpx-vp9 -pix_fmt yuva420p -auto-alt-ref 0 "
f"-b:v 1M "
f"'{output_path}'"
)
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
response = {
"command": cmd,
"stdout": result.stdout,
"stderr": result.stderr,
"return_code": result.returncode,
"file_exists": os.path.exists(output_path),
"file_size": os.path.getsize(output_path) if os.path.exists(output_path) else 0
}
# Upload to cloudinary if successful
if result.returncode == 0 and os.path.exists(output_path):
res = cloudinary.uploader.unsigned_upload(
output_path, UPLOAD_PRESET, cloud_name=CLOUD_NAME, resource_type="video"
)
response["cloudinary_url"] = res.get("secure_url")
# Cleanup
shutil.rmtree(work_dir, ignore_errors=True)
return response
except Exception as e:
import traceback
return {"error": str(e), "traceback": traceback.format_exc()}
@app.get("/debug/fonts")
def debug_fonts():
"""Check available fonts on the system"""
result = subprocess.run(
"fc-list : family | sort | uniq",
shell=True, capture_output=True, text=True
)
fonts = result.stdout.strip().split("\n")
return {"fonts": fonts[:50], "total": len(fonts)} # Limit output
@app.get("/debug/ffmpeg")
def debug_ffmpeg():
"""Check FFmpeg version and capabilities"""
version = subprocess.run("ffmpeg -version | head -5", shell=True, capture_output=True, text=True)
encoders = subprocess.run("ffmpeg -encoders 2>/dev/null | grep vp9", shell=True, capture_output=True, text=True)
filters = subprocess.run("ffmpeg -filters 2>/dev/null | grep -E '(ass|subtitles)'", shell=True, capture_output=True, text=True)
return {
"version": version.stdout,
"vp9_encoders": encoders.stdout,
"subtitle_filters": filters.stdout
}