Spaces:
Paused
Paused
| import os | |
| import uuid | |
| import requests | |
| import tempfile | |
| import shutil | |
| import subprocess | |
| import json | |
| from datetime import datetime | |
| from fastapi import FastAPI, BackgroundTasks | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import uvicorn | |
| from huggingface_hub import HfApi, hf_hub_download | |
| from typing import Optional, List, Dict, Any | |
| import base64 | |
| from io import BytesIO | |
| print("=" * 60) | |
| print("🚀 TEXT STYLING API - WITH AUTO CAPTION FEATURE") | |
| print("=" * 60) | |
| # ============================================= | |
| # TRY TO IMPORT WHISPER | |
| # ============================================= | |
| try: | |
| import whisper | |
| WHISPER_AVAILABLE = True | |
| print("✅ Whisper available for transcription") | |
| # Will load model on first use | |
| WHISPER_MODEL = None | |
| except ImportError: | |
| WHISPER_AVAILABLE = False | |
| print("⚠️ Whisper not available - transcription disabled") | |
| # ============================================= | |
| # CONFIGURATION | |
| # ============================================= | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| HF_USERNAME = "yukee1992" | |
| FONTS_DATASET = f"{HF_USERNAME}/video-fonts" | |
| VIDEO_DATASET = f"{HF_USERNAME}/video-project-images" | |
| FONTS_DIR = "/tmp/fonts" | |
| print(f"📦 Fonts Dataset: {FONTS_DATASET}") | |
| print(f"📦 Video Dataset: {VIDEO_DATASET}") | |
| print(f"🔑 HF Token: {'✅ Set' if HF_TOKEN else '❌ Missing'}") | |
| # Create directories | |
| os.makedirs(FONTS_DIR, exist_ok=True) | |
| os.makedirs("/tmp/styling", exist_ok=True) | |
| # Initialize HF API | |
| api = HfApi(token=HF_TOKEN) | |
| # ============================================= | |
| # DOWNLOAD FONTS FROM DATASET | |
| # ============================================= | |
| def download_all_fonts(): | |
| """Download all fonts from HF Dataset to local directory""" | |
| try: | |
| files = api.list_repo_files(repo_id=FONTS_DATASET, repo_type="dataset") | |
| fonts = {} | |
| # Ensure directory exists | |
| os.makedirs(FONTS_DIR, exist_ok=True) | |
| print(f"📁 Fonts directory: {FONTS_DIR}") | |
| for file in files: | |
| if file.endswith(('.ttf', '.otf', '.ttc')): | |
| font_name = os.path.basename(file) | |
| local_path = os.path.join(FONTS_DIR, font_name) | |
| print(f"⬇️ Downloading font: {font_name} to {local_path}") | |
| # Download the font | |
| downloaded_path = hf_hub_download( | |
| repo_id=FONTS_DATASET, | |
| filename=file, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| local_dir=FONTS_DIR, | |
| local_dir_use_symlinks=False | |
| ) | |
| print(f"✅ Downloaded to: {downloaded_path}") | |
| # Verify file exists | |
| if os.path.exists(downloaded_path): | |
| file_size = os.path.getsize(downloaded_path) | |
| print(f"📊 File size: {file_size} bytes") | |
| # Store font info with correct path | |
| font_key = font_name.split('.')[0].lower().replace('-', '_') | |
| fonts[font_key] = { | |
| "path": downloaded_path, | |
| "name": font_name, | |
| "display_name": font_name.replace('.ttf', '').replace('.otf', '').replace('.ttc', '').replace('-', ' ') | |
| } | |
| else: | |
| print(f"❌ File not found after download: {downloaded_path}") | |
| print(f"✅ Loaded {len(fonts)} fonts") | |
| print(f"📋 Font keys: {list(fonts.keys())}") | |
| # Debug: List all files in FONTS_DIR | |
| print(f"📁 Files in {FONTS_DIR}:") | |
| for f in os.listdir(FONTS_DIR): | |
| print(f" - {f}") | |
| return fonts | |
| except Exception as e: | |
| print(f"❌ Failed to download fonts: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return {} | |
| # Download fonts at startup | |
| FONTS = download_all_fonts() | |
| # ============================================= | |
| # TRY TO IMPORT PIL (for debug endpoints) | |
| # ============================================= | |
| try: | |
| from PIL import Image, ImageFont, ImageDraw | |
| PIL_AVAILABLE = True | |
| print("✅ PIL available for font testing") | |
| except ImportError: | |
| PIL_AVAILABLE = False | |
| print("⚠️ PIL not available - font testing limited") | |
| try: | |
| from fontTools import ttLib | |
| FONTTOOLS_AVAILABLE = True | |
| print("✅ fontTools available for font analysis") | |
| except ImportError: | |
| FONTTOOLS_AVAILABLE = False | |
| print("⚠️ fontTools not available - font analysis limited") | |
| # ============================================= | |
| # ENHANCED COLOR MAP | |
| # ============================================= | |
| COLOR_MAP = { | |
| # Basic colors | |
| "white": "FFFFFF", | |
| "black": "000000", | |
| "red": "FF0000", | |
| "green": "00FF00", | |
| "blue": "0000FF", | |
| "yellow": "FFFF00", | |
| # Purple variations | |
| "purple": "800080", # Medium purple | |
| "darkpurple": "4B0082", # Indigo | |
| "lightpurple": "9370DB", # Light purple | |
| "violet": "EE82EE", # Violet | |
| "magenta": "FF00FF", # Magenta | |
| # Gold variations | |
| "gold": "FFD700", # Gold | |
| "orange": "FFA500", # Orange | |
| "darkgold": "B8860B", # Dark goldenrod | |
| # Other colors | |
| "cyan": "00FFFF", | |
| "pink": "FFC0CB", | |
| "brown": "A52A2A", | |
| "gray": "808080", | |
| "navy": "000080", | |
| "teal": "008080", | |
| "maroon": "800000", | |
| "olive": "808000", | |
| "coral": "FF7F50", | |
| "lavender": "E6E6FA", | |
| "turquoise": "40E0D0", | |
| "indigo": "4B0082", | |
| "crimson": "DC143C" | |
| } | |
| # ============================================= | |
| # CREATE FASTAPI APP | |
| # ============================================= | |
| app = FastAPI(title="Text Styling API with Auto Caption") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ============================================= | |
| # PYDANTIC MODELS | |
| # ============================================= | |
| from pydantic import BaseModel | |
| class TextStyle(BaseModel): | |
| text: str | |
| font_family: str | |
| font_size: int = 48 | |
| color: str = "white" | |
| bg_color: str = "black@0.5" | |
| position: str = "center" | |
| margin: int = 20 | |
| padding: int = 10 | |
| outline_width: int = 0 # Width of text outline (0 = no outline) | |
| outline_color: str = "black" # Color of the outline | |
| class CaptionStyle(BaseModel): | |
| font_family: str | |
| font_size: int = 36 | |
| color: str = "white" | |
| bg_color: str = "black@0.5" | |
| position: str = "bottom-center" | |
| margin: int = 20 | |
| padding: int = 8 | |
| max_width: int = 0 # 0 = no max width, otherwise will wrap text | |
| outline_width: int = 0 # Width of text outline (0 = no outline) | |
| outline_color: str = "black" # Color of the outline | |
| class CaptionSegment(BaseModel): | |
| text: str | |
| start_time: float # in seconds | |
| end_time: float # in seconds | |
| class StylingRequest(BaseModel): | |
| project_id: str | |
| video_url: str | |
| title_overlay: Optional[TextStyle] = None | |
| class CaptionRequest(BaseModel): | |
| project_id: str | |
| video_url: str | |
| captions: List[CaptionSegment] | |
| caption_style: CaptionStyle | |
| title_overlay: Optional[TextStyle] = None | |
| class TranscriptionRequest(BaseModel): | |
| project_id: str | |
| video_url: str | |
| audio_url: Optional[str] = None # If not provided, will use video audio | |
| caption_style: CaptionStyle | |
| title_overlay: Optional[TextStyle] = None | |
| language: str = "zh" # Default to Chinese, use "en" for English | |
| class StylingResponse(BaseModel): | |
| status: str | |
| project_id: str | |
| styled_video_url: Optional[str] = None | |
| error: Optional[str] = None | |
| class CaptionResponse(BaseModel): | |
| status: str | |
| project_id: str | |
| captioned_video_url: Optional[str] = None | |
| error: Optional[str] = None | |
| caption_count: Optional[int] = None | |
| class TranscriptionResponse(BaseModel): | |
| status: str | |
| project_id: str | |
| captioned_video_url: Optional[str] = None | |
| transcript: Optional[str] = None | |
| caption_count: Optional[int] = None | |
| error: Optional[str] = None | |
| class SrtCaptionRequest(BaseModel): | |
| project_id: str | |
| video_url: str | |
| srt_url: str # Direct URL to SRT file | |
| caption_style: CaptionStyle | |
| title_overlay: Optional[TextStyle] = None | |
| # ============================================= | |
| # HELPER FUNCTIONS | |
| # ============================================= | |
| def download_file(url, local_path): | |
| response = requests.get(url, stream=True) | |
| response.raise_for_status() | |
| with open(local_path, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| return local_path | |
| def upload_to_dataset(file_path, project_id, filename, subfolder="videos"): | |
| if not HF_TOKEN: | |
| return None | |
| try: | |
| dataset_path = f"data/projects/{project_id}/{subfolder}/{filename}" | |
| api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=dataset_path, | |
| repo_id=VIDEO_DATASET, | |
| repo_type="dataset" | |
| ) | |
| return f"https://huggingface.co/datasets/{VIDEO_DATASET}/resolve/main/{dataset_path}" | |
| except Exception as e: | |
| print(f"❌ Upload failed: {e}") | |
| return None | |
| def get_font_path(font_family): | |
| """Get font path from font family name - IMPROVED""" | |
| font_family_lower = font_family.lower().replace(' ', '_') | |
| print(f"🔍 Looking for font: {font_family}") | |
| print(f"📋 Available fonts: {list(FONTS.keys())}") | |
| # Try exact match with our font keys | |
| if font_family_lower in FONTS: | |
| path = FONTS[font_family_lower]["path"] | |
| print(f"✅ Found exact match: {path}") | |
| if os.path.exists(path): | |
| return path | |
| # Try partial matches with font keys | |
| for key, font_info in FONTS.items(): | |
| if font_family_lower in key or font_family_lower in font_info["name"].lower(): | |
| path = font_info["path"] | |
| print(f"✅ Found partial match: {key} -> {path}") | |
| return path | |
| # Scan directories for font files | |
| print("🔍 Scanning directories for font files...") | |
| for root, dirs, files in os.walk(FONTS_DIR): | |
| for file in files: | |
| if file.lower().startswith(font_family_lower) or font_family_lower in file.lower(): | |
| full_path = os.path.join(root, file) | |
| print(f"✅ Found font file: {full_path}") | |
| return full_path | |
| print(f"❌ Font not found: {font_family}") | |
| return None | |
| def get_font_family_name(font_path): | |
| """Extract font family name from font file using fontTools""" | |
| try: | |
| from fontTools import ttLib | |
| font = ttLib.TTFont(font_path) | |
| # Look for family name (nameID 1) | |
| for record in font['name'].names: | |
| if record.nameID == 1: # Font Family name | |
| try: | |
| if record.platformID == 3 and record.platEncID == 1: # Windows Unicode | |
| return record.string.decode('utf-16-be') | |
| else: | |
| return record.string.decode('utf-8', errors='ignore') | |
| except: | |
| continue | |
| # Fallback: use filename without extension | |
| return os.path.splitext(os.path.basename(font_path))[0] | |
| except Exception as e: | |
| print(f"⚠️ Could not extract font family name: {e}") | |
| # Fallback to filename | |
| return os.path.splitext(os.path.basename(font_path))[0] | |
| def extract_audio_from_video(video_path: str, output_audio_path: str) -> bool: | |
| """Extract audio from video file""" | |
| cmd = [ | |
| 'ffmpeg', '-y', | |
| '-i', video_path, | |
| '-vn', # No video | |
| '-acodec', 'mp3', | |
| '-ar', '16000', # Whisper works best with 16kHz | |
| '-ac', '1', # Mono | |
| output_audio_path | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode == 0: | |
| print(f"✅ Audio extracted to: {output_audio_path}") | |
| return True | |
| else: | |
| print(f"❌ Audio extraction failed: {result.stderr}") | |
| return False | |
| def load_whisper_model(): | |
| """Lazy load Whisper model""" | |
| global WHISPER_MODEL | |
| if WHISPER_AVAILABLE and WHISPER_MODEL is None: | |
| print("🎤 Loading Whisper model (this may take a moment)...") | |
| WHISPER_MODEL = whisper.load_model("base") | |
| print("✅ Whisper model loaded") | |
| return WHISPER_MODEL | |
| def transcribe_audio_to_captions(audio_path: str, language: str = "zh") -> List[CaptionSegment]: | |
| """ | |
| Convert audio to timed captions using Whisper | |
| Returns list of CaptionSegment with text and timestamps | |
| """ | |
| if not WHISPER_AVAILABLE: | |
| raise Exception("Whisper not available - cannot transcribe") | |
| print(f"🎤 Transcribing audio with Whisper...") | |
| # Load model | |
| model = load_whisper_model() | |
| # Transcribe with word-level timestamps | |
| result = model.transcribe( | |
| audio_path, | |
| language=language if language != "auto" else None, | |
| word_timestamps=True, | |
| task="transcribe" | |
| ) | |
| # Convert to CaptionSegment list | |
| captions = [] | |
| # Whisper returns segments with start/end times | |
| for segment in result["segments"]: | |
| # Clean up text (remove extra spaces) | |
| text = " ".join(segment["text"].strip().split()) | |
| if text: # Only add non-empty captions | |
| captions.append(CaptionSegment( | |
| text=text, | |
| start_time=segment["start"], | |
| end_time=segment["end"] | |
| )) | |
| print(f"✅ Generated {len(captions)} caption segments") | |
| # Optional: Save transcript for debugging | |
| transcript_path = audio_path.replace(".mp3", "_transcript.json") | |
| with open(transcript_path, "w", encoding="utf-8") as f: | |
| json.dump(result, f, ensure_ascii=False, indent=2) | |
| return captions | |
| def format_ass_time(seconds: float) -> str: | |
| """Convert seconds to ASS timestamp format (H:MM:SS.cc)""" | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| secs = seconds % 60 | |
| centiseconds = int((secs - int(secs)) * 100) | |
| return f"{hours}:{minutes:02d}:{int(secs):02d}.{centiseconds:02d}" | |
| def wrap_text_for_ass(text: str, max_width: int, font_family: str, font_size: int) -> str: | |
| """Simple text wrapping for ASS (ASS uses \\N for new line)""" | |
| # This is a simplified version - estimates width based on character count | |
| if max_width <= 0: | |
| return text | |
| # Rough estimate: each character is about font_size/2 pixels wide | |
| avg_char_width = font_size // 2 | |
| max_chars = max_width // avg_char_width | |
| if len(text) <= max_chars: | |
| return text | |
| # Try to wrap at word boundaries | |
| words = text.split() | |
| lines = [] | |
| current_line = [] | |
| current_length = 0 | |
| for word in words: | |
| word_length = len(word) | |
| if current_length + word_length + 1 <= max_chars: | |
| current_line.append(word) | |
| current_length += word_length + 1 | |
| else: | |
| if current_line: | |
| lines.append(" ".join(current_line)) | |
| current_line = [word] | |
| current_length = word_length | |
| if current_line: | |
| lines.append(" ".join(current_line)) | |
| return "\\N".join(lines) | |
| # ============================================= | |
| # SRT PARSING FUNCTIONS | |
| # ============================================= | |
| def parse_srt_file(srt_content: str) -> List[CaptionSegment]: | |
| """ | |
| Parse SRT subtitle file content into CaptionSegment list | |
| Supports both \n and \r\n line endings | |
| """ | |
| captions = [] | |
| # Normalize line endings | |
| srt_content = srt_content.replace('\r\n', '\n') | |
| # Split by double newline (separates subtitle blocks) | |
| blocks = srt_content.strip().split('\n\n') | |
| for block in blocks: | |
| lines = block.strip().split('\n') | |
| if len(lines) >= 3: | |
| # Skip the index number (first line) | |
| timestamp_line = lines[1] | |
| # Parse timestamp format: 00:00:01,234 --> 00:00:04,567 | |
| time_parts = timestamp_line.split(' --> ') | |
| if len(time_parts) == 2: | |
| start_time = srt_time_to_seconds(time_parts[0]) | |
| end_time = srt_time_to_seconds(time_parts[1]) | |
| # Text is everything after timestamp (may be multiple lines) | |
| text = ' '.join(lines[2:]).strip() | |
| if text: | |
| captions.append(CaptionSegment( | |
| text=text, | |
| start_time=start_time, | |
| end_time=end_time | |
| )) | |
| print(f"📊 Parsed {len(captions)} captions from SRT") | |
| return captions | |
| def srt_time_to_seconds(time_str: str) -> float: | |
| """ | |
| Convert SRT timestamp to seconds | |
| Format: 00:00:01,234 or 00:00:01.234 | |
| """ | |
| time_str = time_str.replace(',', '.') | |
| parts = time_str.split(':') | |
| if len(parts) == 3: | |
| hours = int(parts[0]) | |
| minutes = int(parts[1]) | |
| seconds = float(parts[2]) | |
| return hours * 3600 + minutes * 60 + seconds | |
| return 0 | |
| # ============================================= | |
| # TEXT OVERLAY FUNCTION | |
| # ============================================= | |
| def create_text_overlay(input_video, output_video, text_style): | |
| """Add text overlay using drawtext method with background box and outline""" | |
| font_path = get_font_path(text_style.font_family) | |
| if not font_path: | |
| print(f"⚠️ Font not found: {text_style.font_family}") | |
| return False | |
| print(f"✅ Using font: {font_path}") | |
| # Parse background color | |
| bg_parts = text_style.bg_color.split('@') | |
| bg_color_name = bg_parts[0].lower() | |
| bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5 | |
| # Map position to drawtext position with margin | |
| margin = text_style.margin | |
| drawtext_pos = { | |
| "bottom-left": f"x={margin}:y=h-th-{margin}", | |
| "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}", | |
| "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}", | |
| "center": "x=(w-tw)/2:y=(h-th)/2", | |
| "left": f"x={margin}:y=(h-th)/2", | |
| "right": f"x=w-tw-{margin}:y=(h-th)/2", | |
| "top-left": f"x={margin}:y={margin}", | |
| "top-center": f"x=(w-tw)/2:y={margin}", | |
| "top-right": f"x=w-tw-{margin}:y={margin}" | |
| } | |
| position = drawtext_pos.get(text_style.position, "x=(w-tw)/2:y=(h-th)/2") | |
| # Build drawtext command with background box and outline | |
| # borderw = outline width, bordercolor = outline color | |
| drawtext_cmd = [ | |
| 'ffmpeg', '-y', | |
| '-i', input_video, | |
| '-vf', f"drawtext=text='{text_style.text}':fontfile={font_path}:fontsize={text_style.font_size}:fontcolor={text_style.color}:{position}:box=1:boxcolor={bg_color_name}@{bg_opacity}:boxborderw={text_style.padding}:borderw={text_style.outline_width}:bordercolor={text_style.outline_color}", | |
| '-c:a', 'copy', | |
| output_video | |
| ] | |
| print(f"🎬 Running drawtext command with outline={text_style.outline_width}, bordercolor={text_style.outline_color}") | |
| result = subprocess.run(drawtext_cmd, capture_output=True, text=True) | |
| if result.returncode == 0: | |
| print(f"✅ Drawtext method succeeded with background and outline") | |
| return True | |
| else: | |
| print(f"❌ Drawtext failed: {result.stderr}") | |
| return False | |
| # ============================================= | |
| # DEBUG ENDPOINTS | |
| # ============================================= | |
| async def debug_characters(font_name: str): | |
| """Test if font supports specific Chinese characters""" | |
| if not PIL_AVAILABLE: | |
| return {"error": "PIL not installed - cannot test characters"} | |
| try: | |
| font_path = None | |
| for key, font_info in FONTS.items(): | |
| if font_name in key or font_name in font_info["name"]: | |
| font_path = font_info["path"] | |
| break | |
| if not font_path: | |
| return {"error": f"Font {font_name} not found"} | |
| # Test different character sets | |
| test_texts = [ | |
| ("English", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"), | |
| ("Numbers", "0123456789"), | |
| ("Common Chinese", "的一是在不了有和人这中大为上个国我以要他时来用们生到作地于出就分对成会可主发年动同工也能下过子说产种面而方后多定行学法所民得经十三之进等着部度家电力里如水化高自二理起小物现实加量都两体制机当使点从业本去把性好应开它合还因由其些然前外天政四日那社义事平形相全表间样与关各重新线内数正心反你明看原又么利比或但质气第向道命此变条只没结解问意建月公无系军很情者最立代想已通并提直题党程展五果料象员革位入常文总次品式活设及管特件长求老头基资边流路级少图山统接知较将组见计别她手角期根论运农指几九区强放决西被干做必战先回则任取据处队南给色光门即保治北造百规热领七海口东导器压志世金增争济阶油思术极交受联什认六共权收证改清己美再采转更单风切打白教速花带安场身车例真务具万每目至达走积示议声报斗完类八离华名确才科张信马节话米整空元况今集温传土许步群广石记需段研界拉林律叫且究观越织装影算低持音众书布复容儿须际商非验连断深难近矿千周委素技备半办青省列习响约支般史感劳便团往酸历市克何除消构府称太准精值号率族维划选标写存候毛亲快效斯院查江型眼王按格养易置派层片始却专状育厂京识适属圆包火住调满县局照参红细引听该铁价严龙飞"), | |
| ("Test Phrase", "荆南麦圆体测试"), | |
| ("Your Text", font_name) | |
| ] | |
| images = [] | |
| for label, text in test_texts: | |
| img = Image.new('RGB', (1200, 400), color='white') | |
| d = ImageDraw.Draw(img) | |
| try: | |
| font = ImageFont.truetype(font_path, 24) | |
| d.text((10, 10), f"{label}:", fill='black', font=font) | |
| d.text((10, 50), str(text)[:200], fill='black', font=font) | |
| except Exception as e: | |
| d.text((10, 10), f"Error: {str(e)}", fill='red', font=ImageFont.load_default()) | |
| buffered = BytesIO() | |
| img.save(buffered, format="PNG") | |
| img_base64 = base64.b64encode(buffered.getvalue()).decode() | |
| images.append(img_base64) | |
| return { | |
| "font_name": font_name, | |
| "font_path": font_path, | |
| "font_size_bytes": os.path.getsize(font_path), | |
| "test_images": images, | |
| "message": "If Chinese characters appear as boxes, the font lacks those glyphs" | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| async def font_info(font_name: str): | |
| """Get detailed font information""" | |
| try: | |
| font_path = None | |
| for key, font_info in FONTS.items(): | |
| if font_name in key or font_name in font_info["name"]: | |
| font_path = font_info["path"] | |
| break | |
| if not font_path: | |
| return {"error": f"Font {font_name} not found"} | |
| info = { | |
| "font_name": font_name, | |
| "path": font_path, | |
| "size_bytes": os.path.getsize(font_path), | |
| "pil_available": PIL_AVAILABLE, | |
| "fonttools_available": FONTTOOLS_AVAILABLE | |
| } | |
| if PIL_AVAILABLE: | |
| try: | |
| font = ImageFont.truetype(font_path, 20) | |
| info["pil_loads"] = True | |
| except Exception as e: | |
| info["pil_loads"] = False | |
| info["pil_error"] = str(e) | |
| if FONTTOOLS_AVAILABLE: | |
| try: | |
| from fontTools import ttLib | |
| font = ttLib.TTFont(font_path) | |
| name_records = {} | |
| for record in font['name'].names: | |
| try: | |
| if record.nameID == 1: | |
| name_records['family'] = record.string.decode('utf-16-be') | |
| elif record.nameID == 2: | |
| name_records['subfamily'] = record.string.decode('utf-16-be') | |
| elif record.nameID == 4: | |
| name_records['full'] = record.string.decode('utf-16-be') | |
| elif record.nameID == 6: | |
| name_records['postscript'] = record.string.decode('utf-16-be') | |
| except: | |
| pass | |
| info["names"] = name_records | |
| cmap = font.getBestCmap() | |
| chinese_ranges = [ | |
| (0x4E00, 0x9FFF), | |
| (0x3400, 0x4DBF), | |
| (0x20000, 0x2A6DF), | |
| ] | |
| has_chinese = False | |
| chinese_count = 0 | |
| for start, end in chinese_ranges: | |
| for code in range(start, min(start+1000, end), 100): | |
| if code in cmap: | |
| has_chinese = True | |
| chinese_count += 1 | |
| info["has_chinese_glyphs"] = has_chinese | |
| info["approx_chinese_glyphs"] = chinese_count * 100 | |
| info["total_glyphs"] = len(font.getGlyphOrder()) | |
| except Exception as e: | |
| info["fonttools_error"] = str(e) | |
| return info | |
| except Exception as e: | |
| return {"error": str(e)} | |
| async def debug_font_list(): | |
| """List all fonts with basic info""" | |
| font_list = [] | |
| for key, font_info in FONTS.items(): | |
| font_list.append({ | |
| "id": key, | |
| "name": font_info["display_name"], | |
| "file": font_info["name"], | |
| "path": font_info["path"], | |
| "size_kb": round(os.path.getsize(font_info["path"]) / 1024, 1) | |
| }) | |
| return {"fonts": font_list} | |
| async def test_chars(font_name: str): | |
| """Test specific characters in the font""" | |
| try: | |
| font_path = None | |
| for key, font_info in FONTS.items(): | |
| if font_name in key or font_name in font_info["name"]: | |
| font_path = font_info["path"] | |
| break | |
| if not font_path: | |
| return {"error": f"Font {font_name} not found"} | |
| from PIL import Image, ImageFont, ImageDraw | |
| import base64 | |
| from io import BytesIO | |
| test_strings = [ | |
| ("English", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"), | |
| ("Numbers", "0123456789"), | |
| ("Basic Chinese", "的一是了不在有"), | |
| ("Your Text", "荆南麦圆体测试"), | |
| ("Common Characters", "你我他中文测试") | |
| ] | |
| results = [] | |
| for label, text in test_strings: | |
| img = Image.new('RGB', (800, 200), color='white') | |
| d = ImageDraw.Draw(img) | |
| try: | |
| font = ImageFont.truetype(font_path, 36) | |
| d.text((10, 50), f"{label}: {text}", fill='black', font=font) | |
| buffered = BytesIO() | |
| img.save(buffered, format="PNG") | |
| img_base64 = base64.b64encode(buffered.getvalue()).decode() | |
| results.append({ | |
| "label": label, | |
| "text": text, | |
| "image": img_base64[:100] + "..." | |
| }) | |
| except Exception as e: | |
| results.append({"label": label, "error": str(e)}) | |
| import subprocess | |
| fc_list = subprocess.run(['fc-list', font_path], capture_output=True, text=True) | |
| return { | |
| "font_name": font_name, | |
| "font_path": font_path, | |
| "font_size": os.path.getsize(font_path), | |
| "font_info": fc_list.stdout, | |
| "test_results": results | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| async def test_transcription(audio_url: str, language: str = "zh"): | |
| """Test transcription on an audio file""" | |
| if not WHISPER_AVAILABLE: | |
| return {"error": "Whisper not available - cannot test transcription"} | |
| try: | |
| work_dir = "/tmp/transcript_test" | |
| os.makedirs(work_dir, exist_ok=True) | |
| audio_path = os.path.join(work_dir, "test_audio.mp3") | |
| download_file(audio_url, audio_path) | |
| captions = transcribe_audio_to_captions(audio_path, language) | |
| result = { | |
| "status": "success", | |
| "caption_count": len(captions), | |
| "captions": [ | |
| { | |
| "text": c.text, | |
| "start": c.start_time, | |
| "end": c.end_time | |
| } | |
| for c in captions[:10] | |
| ], | |
| "full_transcript": " ".join([c.text for c in captions])[:500] + "..." if captions else "" | |
| } | |
| return result | |
| except Exception as e: | |
| return {"error": str(e)} | |
| async def test_srt_parsing(project_id: str, srt_filename: str): | |
| """Test parsing an SRT file from your dataset""" | |
| try: | |
| srt_url = f"https://huggingface.co/datasets/{VIDEO_DATASET}/resolve/main/data/projects/{project_id}/subtitles/{srt_filename}" | |
| response = requests.get(srt_url) | |
| if response.status_code != 200: | |
| return {"error": f"Failed to download SRT: {response.status_code}"} | |
| captions = parse_srt_file(response.text) | |
| return { | |
| "status": "success", | |
| "filename": srt_filename, | |
| "caption_count": len(captions), | |
| "preview": [ | |
| { | |
| "index": i, | |
| "text": c.text[:50] + "..." if len(c.text) > 50 else c.text, | |
| "start": c.start_time, | |
| "end": c.end_time | |
| } | |
| for i, c in enumerate(captions[:5]) | |
| ] | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| async def list_colors(): | |
| """List all available colors""" | |
| return { | |
| "colors": list(COLOR_MAP.keys()), | |
| "examples": { | |
| "purple_variations": ["purple", "darkpurple", "lightpurple", "violet", "magenta"], | |
| "gold_variations": ["gold", "orange", "darkgold"], | |
| "basic": ["white", "black", "red", "green", "blue", "yellow"], | |
| "others": ["cyan", "pink", "brown", "gray", "navy", "teal", "maroon", "olive", "coral", "lavender", "turquoise", "indigo", "crimson"] | |
| } | |
| } | |
| async def test_ass_colors(): | |
| """Generate a test ASS file to verify color rendering""" | |
| try: | |
| work_dir = "/tmp/ass_test" | |
| os.makedirs(work_dir, exist_ok=True) | |
| ass_content = """[Script Info] | |
| ; Script generated for color testing | |
| ScriptType: v4.00+ | |
| PlayResX: 1920 | |
| PlayResY: 1080 | |
| [V4+ Styles] | |
| Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding | |
| Style: Default,Arial,48,&H00FFFFFF,&H000000FF,&H00000000,&H99FF00FF,0,0,0,0,100,100,0,0,3,10,0,2,10,10,10,1 | |
| [Events] | |
| Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
| Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Magenta (should be pink/purple): &H99FF00FF | |
| Dialogue: 0,0:00:05.00,0:00:10.00,Default,,0,0,0,,Dark Purple (should be indigo): &H9982004B""" | |
| ass_file = os.path.join(work_dir, "test_colors.ass") | |
| with open(ass_file, 'w', encoding='utf-8') as f: | |
| f.write(ass_content) | |
| with open(ass_file, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| return { | |
| "status": "success", | |
| "message": "Test ASS file created", | |
| "ass_content": content, | |
| "note": "Check your Space logs for the actual color values being used" | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # ============================================= | |
| # MAIN API ENDPOINTS | |
| # ============================================= | |
| async def health(): | |
| return { | |
| "status": "healthy", | |
| "fonts_loaded": len(FONTS), | |
| "colors_available": len(COLOR_MAP), | |
| "pil_available": PIL_AVAILABLE, | |
| "fonttools_available": FONTTOOLS_AVAILABLE, | |
| "whisper_available": WHISPER_AVAILABLE | |
| } | |
| async def list_fonts(): | |
| return {"fonts": list(FONTS.keys())} | |
| async def style_video(request: StylingRequest): | |
| """Add title overlay to video""" | |
| try: | |
| print(f"\n🎨 Styling video for project: {request.project_id}") | |
| work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}" | |
| os.makedirs(work_dir, exist_ok=True) | |
| video_path = os.path.join(work_dir, "input.mp4") | |
| download_file(request.video_url, video_path) | |
| current_video = video_path | |
| if request.title_overlay: | |
| titled_path = os.path.join(work_dir, "titled.mp4") | |
| if create_text_overlay(current_video, titled_path, request.title_overlay): | |
| current_video = titled_path | |
| else: | |
| return StylingResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error="Failed to add text overlay" | |
| ) | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| styled_filename = f"styled_{timestamp}.mp4" | |
| styled_url = upload_to_dataset(current_video, request.project_id, styled_filename, "videos") | |
| shutil.rmtree(work_dir, ignore_errors=True) | |
| if styled_url: | |
| return StylingResponse( | |
| status="success", | |
| project_id=request.project_id, | |
| styled_video_url=styled_url | |
| ) | |
| else: | |
| return StylingResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error="Failed to upload styled video" | |
| ) | |
| except Exception as e: | |
| print(f"❌ Error: {e}") | |
| return StylingResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=str(e) | |
| ) | |
| async def add_captions_to_video(request: CaptionRequest): | |
| """Add pre-defined captions to video""" | |
| try: | |
| print(f"\n📝 Adding {len(request.captions)} captions to video for project: {request.project_id}") | |
| work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}" | |
| os.makedirs(work_dir, exist_ok=True) | |
| video_path = os.path.join(work_dir, "input.mp4") | |
| download_file(request.video_url, video_path) | |
| current_video = video_path | |
| # Get font path | |
| font_path = get_font_path(request.caption_style.font_family) | |
| if not font_path: | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=f"Caption font not found: {request.caption_style.font_family}" | |
| ) | |
| # Get margin value | |
| margin = request.caption_style.margin | |
| # Map position to drawtext position with margin | |
| drawtext_pos = { | |
| "bottom-left": f"x={margin}:y=h-th-{margin}", | |
| "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}", | |
| "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}", | |
| "center": "x=(w-tw)/2:y=(h-th)/2", | |
| "left": f"x={margin}:y=(h-th)/2", | |
| "right": f"x=w-tw-{margin}:y=(h-th)/2", | |
| "top-left": f"x={margin}:y={margin}", | |
| "top-center": f"x=(w-tw)/2:y={margin}", | |
| "top-right": f"x=w-tw-{margin}:y={margin}" | |
| } | |
| position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}") | |
| # Process each caption with its own drawtext filter | |
| current_input = video_path | |
| for i, caption in enumerate(request.captions): | |
| temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4") | |
| # Build drawtext filter with background box and outline | |
| drawtext_filter = ( | |
| f"drawtext=text='{caption.text}':" | |
| f"fontfile={font_path}:" | |
| f"fontsize={request.caption_style.font_size}:" | |
| f"fontcolor={request.caption_style.color}:" | |
| f"{position}:" | |
| f"box=1:" | |
| f"boxcolor={request.caption_style.bg_color}:" | |
| f"boxborderw={request.caption_style.padding}:" | |
| f"borderw={request.caption_style.outline_width}:" | |
| f"bordercolor={request.caption_style.outline_color}:" | |
| f"enable='between(t,{caption.start_time},{caption.end_time})'" | |
| ) | |
| cmd = [ | |
| 'ffmpeg', '-y', | |
| '-i', current_input, | |
| '-vf', drawtext_filter, | |
| '-c:a', 'copy', | |
| temp_output | |
| ] | |
| print(f"🎬 Adding caption {i+1}/{len(request.captions)}: '{caption.text}' with outline={request.caption_style.outline_width}") | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| print(f"❌ FFmpeg error at caption {i+1}: {result.stderr}") | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=f"Failed to add caption {i+1}: {result.stderr[:200]}" | |
| ) | |
| current_input = temp_output | |
| final_video = current_input | |
| if request.title_overlay: | |
| titled_path = os.path.join(work_dir, "titled.mp4") | |
| if create_text_overlay(final_video, titled_path, request.title_overlay): | |
| final_video = titled_path | |
| else: | |
| print("⚠️ Title overlay failed, continuing with captioned video") | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| final_filename = f"captioned_{timestamp}.mp4" | |
| final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos") | |
| shutil.rmtree(work_dir, ignore_errors=True) | |
| if final_url: | |
| return CaptionResponse( | |
| status="success", | |
| project_id=request.project_id, | |
| captioned_video_url=final_url, | |
| caption_count=len(request.captions) | |
| ) | |
| else: | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error="Failed to upload captioned video" | |
| ) | |
| except Exception as e: | |
| print(f"❌ Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=str(e) | |
| ) | |
| async def transcribe_and_caption(request: TranscriptionRequest): | |
| """ | |
| Transcribe audio and add styled captions to video | |
| If audio_url is provided, use that MP3. Otherwise, extract audio from video. | |
| """ | |
| try: | |
| print(f"\n🎬 Starting transcription for project: {request.project_id}") | |
| work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}" | |
| os.makedirs(work_dir, exist_ok=True) | |
| video_path = os.path.join(work_dir, "input.mp4") | |
| download_file(request.video_url, video_path) | |
| audio_path = os.path.join(work_dir, "audio.mp3") | |
| if request.audio_url: | |
| print(f"📥 Downloading audio from: {request.audio_url}") | |
| download_file(request.audio_url, audio_path) | |
| else: | |
| print("🎵 No audio URL provided, extracting from video...") | |
| if not extract_audio_from_video(video_path, audio_path): | |
| return TranscriptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error="Failed to extract audio from video" | |
| ) | |
| print("📝 Transcribing audio...") | |
| try: | |
| captions = transcribe_audio_to_captions(audio_path, request.language) | |
| except Exception as e: | |
| return TranscriptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=f"Transcription failed: {str(e)}" | |
| ) | |
| if not captions: | |
| return TranscriptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error="No captions generated from audio" | |
| ) | |
| # Get font path | |
| font_path = get_font_path(request.caption_style.font_family) | |
| if not font_path: | |
| return TranscriptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=f"Caption font not found: {request.caption_style.font_family}" | |
| ) | |
| # Get margin value | |
| margin = request.caption_style.margin | |
| # Map position to drawtext position with margin | |
| drawtext_pos = { | |
| "bottom-left": f"x={margin}:y=h-th-{margin}", | |
| "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}", | |
| "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}", | |
| "center": "x=(w-tw)/2:y=(h-th)/2", | |
| "left": f"x={margin}:y=(h-th)/2", | |
| "right": f"x=w-tw-{margin}:y=(h-th)/2", | |
| "top-left": f"x={margin}:y={margin}", | |
| "top-center": f"x=(w-tw)/2:y={margin}", | |
| "top-right": f"x=w-tw-{margin}:y={margin}" | |
| } | |
| position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}") | |
| # Process each caption with its own drawtext filter | |
| current_input = video_path | |
| for i, caption in enumerate(captions): | |
| temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4") | |
| # Build drawtext filter with background box and outline | |
| drawtext_filter = ( | |
| f"drawtext=text='{caption.text}':" | |
| f"fontfile={font_path}:" | |
| f"fontsize={request.caption_style.font_size}:" | |
| f"fontcolor={request.caption_style.color}:" | |
| f"{position}:" | |
| f"box=1:" | |
| f"boxcolor={request.caption_style.bg_color}:" | |
| f"boxborderw={request.caption_style.padding}:" | |
| f"borderw={request.caption_style.outline_width}:" | |
| f"bordercolor={request.caption_style.outline_color}:" | |
| f"enable='between(t,{caption.start_time},{caption.end_time})'" | |
| ) | |
| cmd = [ | |
| 'ffmpeg', '-y', | |
| '-i', current_input, | |
| '-vf', drawtext_filter, | |
| '-c:a', 'copy', | |
| temp_output | |
| ] | |
| print(f"🎬 Adding caption {i+1}/{len(captions)}: '{caption.text}'") | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| print(f"❌ FFmpeg error at caption {i+1}: {result.stderr}") | |
| return TranscriptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=f"Failed to add caption {i+1}: {result.stderr[:200]}" | |
| ) | |
| current_input = temp_output | |
| final_video = current_input | |
| if request.title_overlay: | |
| titled_path = os.path.join(work_dir, "titled.mp4") | |
| if create_text_overlay(final_video, titled_path, request.title_overlay): | |
| final_video = titled_path | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| final_filename = f"transcribed_{timestamp}.mp4" | |
| final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos") | |
| transcript = " ".join([c.text for c in captions]) | |
| shutil.rmtree(work_dir, ignore_errors=True) | |
| if final_url: | |
| return TranscriptionResponse( | |
| status="success", | |
| project_id=request.project_id, | |
| captioned_video_url=final_url, | |
| transcript=transcript, | |
| caption_count=len(captions) | |
| ) | |
| else: | |
| return TranscriptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error="Failed to upload video" | |
| ) | |
| except Exception as e: | |
| print(f"❌ Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return TranscriptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=str(e) | |
| ) | |
| async def add_captions_from_srt(request: SrtCaptionRequest): | |
| """ | |
| Add captions to video using SRT file from URL | |
| """ | |
| try: | |
| print(f"\n📝 Adding captions from SRT URL for project: {request.project_id}") | |
| work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}" | |
| os.makedirs(work_dir, exist_ok=True) | |
| video_path = os.path.join(work_dir, "input.mp4") | |
| download_file(request.video_url, video_path) | |
| srt_path = os.path.join(work_dir, "subtitles.srt") | |
| try: | |
| download_file(request.srt_url, srt_path) | |
| print(f"✅ Downloaded SRT from: {request.srt_url}") | |
| except Exception as e: | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=f"Failed to download SRT file from URL: {str(e)}" | |
| ) | |
| with open(srt_path, 'r', encoding='utf-8') as f: | |
| srt_content = f.read() | |
| captions = parse_srt_file(srt_content) | |
| if not captions: | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error="No valid captions found in SRT file" | |
| ) | |
| print(f"✅ Parsed {len(captions)} captions from SRT") | |
| font_path = get_font_path(request.caption_style.font_family) | |
| if not font_path: | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=f"Caption font not found: {request.caption_style.font_family}" | |
| ) | |
| # Get margin value | |
| margin = request.caption_style.margin | |
| print(f"📏 Using margin: {margin} pixels") | |
| # Map position to drawtext position with margin | |
| drawtext_pos = { | |
| "bottom-left": f"x={margin}:y=h-th-{margin}", | |
| "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}", | |
| "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}", | |
| "center": "x=(w-tw)/2:y=(h-th)/2", | |
| "left": f"x={margin}:y=(h-th)/2", | |
| "right": f"x=w-tw-{margin}:y=(h-th)/2", | |
| "top-left": f"x={margin}:y={margin}", | |
| "top-center": f"x=(w-tw)/2:y={margin}", | |
| "top-right": f"x=w-tw-{margin}:y={margin}" | |
| } | |
| position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}") | |
| print(f"📍 Position string: {position}") | |
| # Process each caption with its own drawtext filter | |
| current_input = video_path | |
| for i, caption in enumerate(captions): | |
| temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4") | |
| # Build drawtext filter with background box and outline | |
| drawtext_filter = ( | |
| f"drawtext=text='{caption.text}':" | |
| f"fontfile={font_path}:" | |
| f"fontsize={request.caption_style.font_size}:" | |
| f"fontcolor={request.caption_style.color}:" | |
| f"{position}:" | |
| f"box=1:" | |
| f"boxcolor={request.caption_style.bg_color}:" | |
| f"boxborderw={request.caption_style.padding}:" | |
| f"borderw={request.caption_style.outline_width}:" | |
| f"bordercolor={request.caption_style.outline_color}:" | |
| f"enable='between(t,{caption.start_time},{caption.end_time})'" | |
| ) | |
| cmd = [ | |
| 'ffmpeg', '-y', | |
| '-i', current_input, | |
| '-vf', drawtext_filter, | |
| '-c:a', 'copy', | |
| temp_output | |
| ] | |
| print(f"🎬 Adding caption {i+1}/{len(captions)}: '{caption.text}' at margin {margin} with outline={request.caption_style.outline_width}") | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| print(f"❌ FFmpeg error at caption {i+1}: {result.stderr}") | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=f"Failed to add caption {i+1}: {result.stderr[:200]}" | |
| ) | |
| current_input = temp_output | |
| final_video = current_input | |
| if request.title_overlay: | |
| titled_path = os.path.join(work_dir, "titled.mp4") | |
| if create_text_overlay(final_video, titled_path, request.title_overlay): | |
| final_video = titled_path | |
| else: | |
| print("⚠️ Title overlay failed, continuing with captioned video") | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| final_filename = f"captioned_from_srt_{timestamp}.mp4" | |
| final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos") | |
| shutil.rmtree(work_dir, ignore_errors=True) | |
| if final_url: | |
| return CaptionResponse( | |
| status="success", | |
| project_id=request.project_id, | |
| captioned_video_url=final_url, | |
| caption_count=len(captions) | |
| ) | |
| else: | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error="Failed to upload captioned video" | |
| ) | |
| except Exception as e: | |
| print(f"❌ Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return CaptionResponse( | |
| status="error", | |
| project_id=request.project_id, | |
| error=str(e) | |
| ) | |
| async def root(): | |
| return { | |
| "name": "Text Styling API with Auto Caption", | |
| "version": "4.0.0", | |
| "features": { | |
| "title_overlay": "✅", | |
| "manual_captions": "✅", | |
| "auto_transcription": "✅" if WHISPER_AVAILABLE else "❌", | |
| "srt_support": "✅", | |
| "enhanced_colors": f"✅ ({len(COLOR_MAP)} colors)", | |
| "text_outline": "✅", | |
| "background_boxes": "✅", | |
| "margin_control": "✅" | |
| }, | |
| "endpoints": { | |
| "style": "POST /api/style - Add title overlay", | |
| "caption": "POST /api/caption - Add manual captions", | |
| "caption_from_srt": "POST /api/caption-from-srt - Add captions from SRT URL", | |
| "transcribe": "POST /api/transcribe-and-caption - Auto-transcribe from audio", | |
| "fonts": "GET /fonts - List available fonts", | |
| "colors": "GET /debug/colors - List available colors", | |
| "health": "GET /health - Check status" | |
| }, | |
| "debug": { | |
| "font_list": "GET /debug/font-list", | |
| "font_info": "GET /debug/font-info/{font_name}", | |
| "characters": "GET /debug/characters/{font_name}", | |
| "test_transcription": "POST /debug/test-transcription?audio_url=...&language=zh", | |
| "test_srt": "POST /debug/test-srt - Test SRT parsing", | |
| "colors": "GET /debug/colors - List all colors", | |
| "test_ass_colors": "POST /debug/test-ass-colors - Test ASS color rendering" | |
| }, | |
| "fonts_loaded": len(FONTS), | |
| "colors_available": len(COLOR_MAP) | |
| } | |
| async def ffmpeg_info(): | |
| result = subprocess.run(['ffmpeg', '-version'], capture_output=True, text=True) | |
| filters = subprocess.run(['ffmpeg', '-filters'], capture_output=True, text=True) | |
| return { | |
| "version": result.stdout.split('\n')[0], | |
| "has_ass_filter": "ass" in filters.stdout | |
| } | |
| # ============================================= | |
| # RUN | |
| # ============================================= | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |