Spaces:

yukee1992
/

text-styling

Paused

App Files Files Community

yukee1992 commited on Mar 18

Commit

47fb9d5

verified ·

1 Parent(s): 00588c9

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -194

app.py CHANGED Viewed

@@ -202,6 +202,8 @@ class TextStyle(BaseModel):
     position: str = "center"
     margin: int = 20
     padding: int = 10
 class CaptionStyle(BaseModel):
     font_family: str
@@ -212,6 +214,8 @@ class CaptionStyle(BaseModel):
     margin: int = 20
     padding: int = 8
     max_width: int = 0  # 0 = no max width, otherwise will wrap text
 class CaptionSegment(BaseModel):
     text: str
@@ -466,7 +470,7 @@ def wrap_text_for_ass(text: str, max_width: int, font_family: str, font_size: in
 def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work_dir: str, font_path: str) -> str:
-    """Create ASS subtitle file for captions with enhanced colors"""
     # Get actual font family name
     font_family_name = get_font_family_name(font_path)
@@ -474,21 +478,21 @@ def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work
     # Get font color and convert from RRGGBB to BBGGRR for ASS
     font_color_rgb = COLOR_MAP.get(style.color.lower(), "FFFFFF")
-    # Convert RGB to BGR (swap first two and last two characters)
     font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
     print(f"🎨 Font color: {style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
     # Parse background color with proper alpha calculation
     bg_parts = style.bg_color.split('@')
     bg_color_name = bg_parts[0].lower()
-    # Default opacity 0.5 if not specified
     bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
-    # Clamp opacity between 0 and 1
     bg_opacity = max(0, min(1, bg_opacity))
     bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
-    # Convert RGB to BGR for background
     bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
-    # Alpha: 0 = opaque, 255 = transparent
     bg_alpha = int((1 - bg_opacity) * 255)
     print(f"🎨 BG color: {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
@@ -498,15 +502,15 @@ def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work
         "left": 4, "center": 5, "right": 6,
         "top-left": 7, "top-center": 8, "top-right": 9
     }
-    alignment = pos_map.get(style.position, 2)  # Default to bottom-center
     # Calculate margins
     margin_l = style.margin if alignment in [1,4,7] else 0
     margin_r = style.margin if alignment in [3,6,9] else 0
     margin_v = style.margin
-    # Create ASS header with proper border settings
-    # BorderStyle=3 means opaque box, Outline controls box padding
     ass_header = f"""[Script Info]
 ; Script generated by Video Styling Space - Auto Caption
 ScriptType: v4.00+
@@ -517,7 +521,7 @@ WrapStyle: 1
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-Style: Default,{font_family_name},{style.font_size},&H00{font_color_bgr},&H000000FF,&H00000000,&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,3,{style.padding},0,{alignment},{margin_l},{margin_r},{margin_v},1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
@@ -526,11 +530,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
     # Add caption events
     ass_events = []
     for i, caption in enumerate(captions):
-        # Format timestamps
         start = format_ass_time(caption.start_time)
         end = format_ass_time(caption.end_time)
-        # Handle text wrapping if max_width specified
         text = caption.text
         if style.max_width > 0:
             text = wrap_text_for_ass(text, style.max_width, font_family_name, style.font_size)
@@ -542,17 +544,13 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
     with open(ass_file, 'w', encoding='utf-8') as f:
         f.write(ass_header + "\n".join(ass_events))
-    # Debug: Print the ASS file content
     print("\n=== ASS FILE DEBUG ===")
     with open(ass_file, 'r', encoding='utf-8') as f:
         print(f.read())
     print("=== END ASS DEBUG ===\n")
     print(f"📝 Created caption ASS file with {len(captions)} captions")
-    print(f"📝 Style line: BorderStyle=3, Outline={style.padding}, BackColor=&H{bg_alpha:02X}{bg_color_bgr}")
-    # For captions, we need to use the ASS method (can't use drawtext for multiple timed captions)
-    # But we've improved the ASS formatting for better background rendering
     return ass_file
 # =============================================
@@ -576,7 +574,6 @@ def parse_srt_file(srt_content: str) -> List[CaptionSegment]:
         lines = block.strip().split('\n')
         if len(lines) >= 3:
             # Skip the index number (first line)
-            # Parse timestamp line (second line)
             timestamp_line = lines[1]
             # Parse timestamp format: 00:00:01,234 --> 00:00:04,567
@@ -603,10 +600,7 @@ def srt_time_to_seconds(time_str: str) -> float:
     Convert SRT timestamp to seconds
     Format: 00:00:01,234 or 00:00:01.234
     """
-    # Replace comma with dot for millisecond parsing
     time_str = time_str.replace(',', '.')
-    # Split into hours, minutes, seconds
     parts = time_str.split(':')
     if len(parts) == 3:
         hours = int(parts[0])
@@ -620,7 +614,7 @@ def srt_time_to_seconds(time_str: str) -> float:
 # =============================================
 def create_text_overlay(input_video, output_video, text_style):
-    """Add text overlay using ASS subtitles with enhanced colors"""
     font_path = get_font_path(text_style.font_family)
     if not font_path:
         print(f"⚠️ Font not found: {text_style.font_family}")
@@ -646,19 +640,21 @@ def create_text_overlay(input_video, output_video, text_style):
     # Get font color and convert from RRGGBB to BBGGRR for ASS
     font_color_rgb = COLOR_MAP.get(text_style.color.lower(), "FFFFFF")
-    # Convert RGB to BGR (swap first two and last two characters)
     font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
     print(f"🎨 Font color: {text_style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
     # Parse background color
     bg_parts = text_style.bg_color.split('@')
     bg_color_name = bg_parts[0].lower()
     bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
     bg_opacity = max(0, min(1, bg_opacity))
     bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
-    # Convert RGB to BGR for background
     bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
-    # Alpha: 0 = opaque, 255 = transparent
     bg_alpha = int((1 - bg_opacity) * 255)
     print(f"🎨 BG color: {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
@@ -686,12 +682,10 @@ def create_text_overlay(input_video, output_video, text_style):
     fc_file = os.path.join(work_dir, "fonts.conf")
     with open(fc_file, 'w') as f:
         f.write(fc_config)
-    # Set environment variable for fontconfig
     os.environ['FONTCONFIG_FILE'] = fc_file
-    # Create ASS file content with solid rectangle background
-    # BorderStyle=3 gives solid box, Outline controls padding
     ass_content = f"""[Script Info]
 ; Script generated by Video Styling Space
 ScriptType: v4.00+
@@ -701,7 +695,7 @@ ScaledBorderAndShadow: yes
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-Style: Default,{font_family_name},{text_style.font_size},&H00{font_color_bgr},&H000000FF,&H00000000,&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,3,{text_style.padding},0,{alignment},{margin_l},{margin_r},{margin_v},1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
@@ -718,56 +712,26 @@ Dialogue: 0,0:00:00.00,0:00:10.00,Default,,0,0,0,,{text_style.text}"""
     print("=== END ASS DEBUG ===\n")
     print(f"📝 Created ASS subtitle file with font family: {font_family_name}")
-    print(f"📝 Style line: BorderStyle=3, Outline={text_style.padding}, BackColor=&H{bg_alpha:02X}{bg_color_bgr}")
-    # Try drawtext method first (more reliable for backgrounds)
-    print("🎬 Using drawtext method for reliable backgrounds...")
-    drawtext_pos = {
-        "bottom-left": "x=20:y=h-th-20",
-        "bottom-center": "x=(w-tw)/2:y=h-th-20",
-        "bottom-right": "x=w-tw-20:y=h-th-20",
-        "center": "x=(w-tw)/2:y=(h-th)/2",
-        "top-left": "x=20:y=20",
-        "top-center": "x=(w-tw)/2:y=20",
-        "top-right": "x=w-tw-20:y=20"
-    }
-    position = drawtext_pos.get(text_style.position, "x=(w-tw)/2:y=(h-th)/2")
-    # Use the actual color name for drawtext (without round parameter)
-    drawtext_cmd = [
         'ffmpeg', '-y',
         '-i', input_video,
-        '-vf', f"drawtext=text='{text_style.text}':fontfile={font_path}:fontsize={text_style.font_size}:fontcolor={text_style.color}:{position}:box=1:boxcolor={bg_color_name}@{bg_opacity}:boxborderw={text_style.padding}",
         '-c:a', 'copy',
         output_video
     ]
-    print(f"🎬 Running drawtext command...")
-    result = subprocess.run(drawtext_cmd, capture_output=True, text=True)
     if result.returncode == 0:
-        print(f"✅ Drawtext method succeeded with solid background")
         return True
     else:
-        print(f"❌ Drawtext failed: {result.stderr}")
-        print("🔄 Falling back to ASS method...")
-        # Try ASS method as fallback
-        cmd = [
-            'ffmpeg', '-y',
-            '-i', input_video,
-            '-vf', f"ass={ass_file}",
-            '-c:a', 'copy',
-            output_video
-        ]
-        result2 = subprocess.run(cmd, capture_output=True, text=True)
-        if result2.returncode == 0:
-            print(f"✅ ASS method succeeded")
-            return True
-        else:
-            print(f"❌ ASS method also failed: {result2.stderr}")
-            return False
 # =============================================
 # DEBUG ENDPOINTS
@@ -800,21 +764,16 @@ async def debug_characters(font_name: str):
         images = []
         for label, text in test_texts:
-            # Create image
             img = Image.new('RGB', (1200, 400), color='white')
             d = ImageDraw.Draw(img)
             try:
-                # Try to load font with size 24
                 font = ImageFont.truetype(font_path, 24)
-                # Draw label
                 d.text((10, 10), f"{label}:", fill='black', font=font)
-                # Draw text (wrap if too long)
                 d.text((10, 50), str(text)[:200], fill='black', font=font)
             except Exception as e:
                 d.text((10, 10), f"Error: {str(e)}", fill='red', font=ImageFont.load_default())
-            # Convert to base64
             buffered = BytesIO()
             img.save(buffered, format="PNG")
             img_base64 = base64.b64encode(buffered.getvalue()).decode()
@@ -852,7 +811,6 @@ async def font_info(font_name: str):
             "fonttools_available": FONTTOOLS_AVAILABLE
         }
-        # Try to get basic info with PIL
         if PIL_AVAILABLE:
             try:
                 font = ImageFont.truetype(font_path, 20)
@@ -861,13 +819,11 @@ async def font_info(font_name: str):
                 info["pil_loads"] = False
                 info["pil_error"] = str(e)
-        # Try to get detailed info with fontTools
         if FONTTOOLS_AVAILABLE:
             try:
                 from fontTools import ttLib
                 font = ttLib.TTFont(font_path)
-                # Get font name
                 name_records = {}
                 for record in font['name'].names:
                     try:
@@ -884,12 +840,11 @@ async def font_info(font_name: str):
                 info["names"] = name_records
-                # Check for Chinese glyphs
                 cmap = font.getBestCmap()
                 chinese_ranges = [
-                    (0x4E00, 0x9FFF),  # CJK Unified Ideographs
-                    (0x3400, 0x4DBF),  # Extension A
-                    (0x20000, 0x2A6DF), # Extension B
                 ]
                 has_chinese = False
@@ -943,7 +898,6 @@ async def test_chars(font_name: str):
         import base64
         from io import BytesIO
-        # Test different character sets
         test_strings = [
             ("English", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
             ("Numbers", "0123456789"),
@@ -954,7 +908,6 @@ async def test_chars(font_name: str):
         results = []
         for label, text in test_strings:
-            # Create image
             img = Image.new('RGB', (800, 200), color='white')
             d = ImageDraw.Draw(img)
@@ -962,19 +915,17 @@ async def test_chars(font_name: str):
                 font = ImageFont.truetype(font_path, 36)
                 d.text((10, 50), f"{label}: {text}", fill='black', font=font)
-                # Convert to base64
                 buffered = BytesIO()
                 img.save(buffered, format="PNG")
                 img_base64 = base64.b64encode(buffered.getvalue()).decode()
                 results.append({
                     "label": label,
                     "text": text,
-                    "image": img_base64[:100] + "..."  # Truncate for display
                 })
             except Exception as e:
                 results.append({"label": label, "error": str(e)})
-        # Also return font info
         import subprocess
         fc_list = subprocess.run(['fc-list', font_path], capture_output=True, text=True)
@@ -1002,10 +953,8 @@ async def test_transcription(audio_url: str, language: str = "zh"):
         audio_path = os.path.join(work_dir, "test_audio.mp3")
         download_file(audio_url, audio_path)
-        # Test transcription
         captions = transcribe_audio_to_captions(audio_path, language)
-        # Format for display
         result = {
             "status": "success",
             "caption_count": len(captions),
@@ -1015,7 +964,7 @@ async def test_transcription(audio_url: str, language: str = "zh"):
                     "start": c.start_time,
                     "end": c.end_time
                 }
-                for c in captions[:10]  # Show first 10
             ],
             "full_transcript": " ".join([c.text for c in captions])[:500] + "..." if captions else ""
         }
@@ -1029,17 +978,14 @@ async def test_transcription(audio_url: str, language: str = "zh"):
 async def test_srt_parsing(project_id: str, srt_filename: str):
     """Test parsing an SRT file from your dataset"""
     try:
-        # Download SRT file
         srt_url = f"https://huggingface.co/datasets/{VIDEO_DATASET}/resolve/main/data/projects/{project_id}/subtitles/{srt_filename}"
         response = requests.get(srt_url)
         if response.status_code != 200:
             return {"error": f"Failed to download SRT: {response.status_code}"}
-        # Parse
         captions = parse_srt_file(response.text)
-        # Return preview
         return {
             "status": "success",
             "filename": srt_filename,
@@ -1051,7 +997,7 @@ async def test_srt_parsing(project_id: str, srt_filename: str):
                     "start": c.start_time,
                     "end": c.end_time
                 }
-                for i, c in enumerate(captions[:5])  # Show first 5
             ]
         }
     except Exception as e:
@@ -1077,7 +1023,6 @@ async def test_ass_colors():
         work_dir = "/tmp/ass_test"
         os.makedirs(work_dir, exist_ok=True)
-        # Create a test ASS file with various colors
         ass_content = """[Script Info]
 ; Script generated for color testing
 ScriptType: v4.00+
@@ -1097,7 +1042,6 @@ Dialogue: 0,0:00:05.00,0:00:10.00,Default,,0,0,0,,Dark Purple (should be indigo)
         with open(ass_file, 'w', encoding='utf-8') as f:
             f.write(ass_content)
-        # Read it back to verify
         with open(ass_file, 'r', encoding='utf-8') as f:
             content = f.read()
@@ -1138,13 +1082,11 @@ async def style_video(request: StylingRequest):
         work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
         os.makedirs(work_dir, exist_ok=True)
-        # Download video
         video_path = os.path.join(work_dir, "input.mp4")
         download_file(request.video_url, video_path)
         current_video = video_path
-        # Add title overlay if provided
         if request.title_overlay:
             titled_path = os.path.join(work_dir, "titled.mp4")
             if create_text_overlay(current_video, titled_path, request.title_overlay):
@@ -1156,12 +1098,10 @@ async def style_video(request: StylingRequest):
                     error="Failed to add text overlay"
                 )
-        # Upload styled video
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         styled_filename = f"styled_{timestamp}.mp4"
         styled_url = upload_to_dataset(current_video, request.project_id, styled_filename, "videos")
-        # Cleanup
         shutil.rmtree(work_dir, ignore_errors=True)
         if styled_url:
@@ -1194,13 +1134,11 @@ async def add_captions_to_video(request: CaptionRequest):
         work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
         os.makedirs(work_dir, exist_ok=True)
-        # Download video
         video_path = os.path.join(work_dir, "input.mp4")
         download_file(request.video_url, video_path)
         current_video = video_path
-        # Get font path for captions
         font_path = get_font_path(request.caption_style.font_family)
         if not font_path:
             return CaptionResponse(
@@ -1209,7 +1147,6 @@ async def add_captions_to_video(request: CaptionRequest):
                 error=f"Caption font not found: {request.caption_style.font_family}"
             )
-        # Create fontconfig config
         font_dir = os.path.dirname(font_path)
         fc_config = f"""<?xml version="1.0"?>
 <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
@@ -1222,7 +1159,6 @@ async def add_captions_to_video(request: CaptionRequest):
             f.write(fc_config)
         os.environ['FONTCONFIG_FILE'] = fc_file
-        # Create caption ASS file
         ass_file = create_caption_ass(
             request.captions,
             request.caption_style,
@@ -1230,7 +1166,6 @@ async def add_captions_to_video(request: CaptionRequest):
             font_path
         )
-        # Apply captions to video
         captioned_path = os.path.join(work_dir, "captioned.mp4")
         cmd = [
@@ -1254,7 +1189,6 @@ async def add_captions_to_video(request: CaptionRequest):
         current_video = captioned_path
-        # Add title overlay if provided
         if request.title_overlay:
             titled_path = os.path.join(work_dir, "titled.mp4")
             if create_text_overlay(current_video, titled_path, request.title_overlay):
@@ -1262,12 +1196,10 @@ async def add_captions_to_video(request: CaptionRequest):
             else:
                 print("⚠️ Title overlay failed, continuing with captioned video")
-        # Upload final video
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         final_filename = f"captioned_{timestamp}.mp4"
         final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
-        # Cleanup
         shutil.rmtree(work_dir, ignore_errors=True)
         if final_url:
@@ -1306,19 +1238,15 @@ async def transcribe_and_caption(request: TranscriptionRequest):
         work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
         os.makedirs(work_dir, exist_ok=True)
-        # Download video
         video_path = os.path.join(work_dir, "input.mp4")
         download_file(request.video_url, video_path)
-        # Get audio for transcription
         audio_path = os.path.join(work_dir, "audio.mp3")
         if request.audio_url:
-            # Download provided MP3
             print(f"📥 Downloading audio from: {request.audio_url}")
             download_file(request.audio_url, audio_path)
         else:
-            # Extract audio from video
             print("🎵 No audio URL provided, extracting from video...")
             if not extract_audio_from_video(video_path, audio_path):
                 return TranscriptionResponse(
@@ -1327,7 +1255,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
                     error="Failed to extract audio from video"
                 )
-        # Transcribe audio to captions
         print("📝 Transcribing audio...")
         try:
             captions = transcribe_audio_to_captions(audio_path, request.language)
@@ -1345,7 +1272,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
                 error="No captions generated from audio"
             )
-        # Get font path for captions
         font_path = get_font_path(request.caption_style.font_family)
         if not font_path:
             return TranscriptionResponse(
@@ -1354,7 +1280,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
                 error=f"Caption font not found: {request.caption_style.font_family}"
             )
-        # Create fontconfig
         font_dir = os.path.dirname(font_path)
         fc_config = f"""<?xml version="1.0"?>
 <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
@@ -1367,7 +1292,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
             f.write(fc_config)
         os.environ['FONTCONFIG_FILE'] = fc_file
-        # Create caption ASS file
         ass_file = create_caption_ass(
             captions,
             request.caption_style,
@@ -1375,7 +1299,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
             font_path
         )
-        # Apply captions to video
         captioned_path = os.path.join(work_dir, "captioned.mp4")
         cmd = [
@@ -1399,21 +1322,17 @@ async def transcribe_and_caption(request: TranscriptionRequest):
         current_video = captioned_path
-        # Add title overlay if provided
         if request.title_overlay:
             titled_path = os.path.join(work_dir, "titled.mp4")
             if create_text_overlay(current_video, titled_path, request.title_overlay):
                 current_video = titled_path
-        # Upload final video
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         final_filename = f"transcribed_{timestamp}.mp4"
         final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
-        # Get plain transcript
         transcript = " ".join([c.text for c in captions])
-        # Cleanup
         shutil.rmtree(work_dir, ignore_errors=True)
         if final_url:
@@ -1444,7 +1363,7 @@ async def transcribe_and_caption(request: TranscriptionRequest):
 @app.post("/api/caption-from-srt", response_model=CaptionResponse)
 async def add_captions_from_srt(request: SrtCaptionRequest):
     """
-    Add captions to video using SRT file from URL - Using drawtext for each caption
     """
     try:
         print(f"\n📝 Adding captions from SRT URL for project: {request.project_id}")
@@ -1452,11 +1371,9 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
         work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
         os.makedirs(work_dir, exist_ok=True)
-        # Download video
         video_path = os.path.join(work_dir, "input.mp4")
         download_file(request.video_url, video_path)
-        # Download SRT file from URL
         srt_path = os.path.join(work_dir, "subtitles.srt")
         try:
             download_file(request.srt_url, srt_path)
@@ -1468,7 +1385,6 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
                 error=f"Failed to download SRT file from URL: {str(e)}"
             )
-        # Parse SRT file
         with open(srt_path, 'r', encoding='utf-8') as f:
             srt_content = f.read()
@@ -1483,7 +1399,6 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
         print(f"✅ Parsed {len(captions)} captions from SRT")
-        # Get font path for captions
         font_path = get_font_path(request.caption_style.font_family)
         if not font_path:
             return CaptionResponse(
@@ -1492,7 +1407,6 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
                 error=f"Caption font not found: {request.caption_style.font_family}"
             )
-        # Create fontconfig
         font_dir = os.path.dirname(font_path)
         fc_config = f"""<?xml version="1.0"?>
 <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
@@ -1505,85 +1419,47 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
             f.write(fc_config)
         os.environ['FONTCONFIG_FILE'] = fc_file
-        # Get margin value from request
-        margin = request.caption_style.margin
-        print(f"📏 Using margin: {margin} pixels")
-        # Map position to drawtext position with margin
-        drawtext_pos = {
-            "bottom-left": f"x={margin}:y=h-th-{margin}",
-            "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
-            "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
-            "center": "x=(w-tw)/2:y=(h-th)/2",
-            "left": f"x={margin}:y=(h-th)/2",
-            "right": f"x=w-tw-{margin}:y=(h-th)/2",
-            "top-left": f"x={margin}:y={margin}",
-            "top-center": f"x=(w-tw)/2:y={margin}",
-            "top-right": f"x=w-tw-{margin}:y={margin}"
-        }
-        position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}")
-        print(f"📍 Position string: {position}")
-        # Process each caption with its own drawtext filter
-        current_input = video_path
-        for i, caption in enumerate(captions):
-            # Create a temporary file for this step
-            temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4")
-            # Build drawtext filter for this caption with margin (no round parameter)
-            drawtext_filter = (
-                f"drawtext=text='{caption.text}':"
-                f"fontfile={font_path}:"
-                f"fontsize={request.caption_style.font_size}:"
-                f"fontcolor={request.caption_style.color}:"
-                f"{position}:"
-                f"box=1:"
-                f"boxcolor={request.caption_style.bg_color}:"
-                f"boxborderw={request.caption_style.padding}:"
-                f"enable='between(t,{caption.start_time},{caption.end_time})'"
             )
-            # Apply filter to current input
-            cmd = [
-                'ffmpeg', '-y',
-                '-i', current_input,
-                '-vf', drawtext_filter,
-                '-c:a', 'copy',
-                temp_output
-            ]
-            print(f"🎬 Adding caption {i+1}/{len(captions)}: '{caption.text}' at margin {margin}")
-            result = subprocess.run(cmd, capture_output=True, text=True)
-            if result.returncode != 0:
-                print(f"❌ FFmpeg error at caption {i+1}: {result.stderr}")
-                return CaptionResponse(
-                    status="error",
-                    project_id=request.project_id,
-                    error=f"Failed to add caption {i+1}: {result.stderr[:200]}"
-                )
-            # Update for next iteration
-            current_input = temp_output
-        # Final output is the last temp file
-        final_video = current_input
-        # Add title overlay if provided
         if request.title_overlay:
             titled_path = os.path.join(work_dir, "titled.mp4")
-            if create_text_overlay(final_video, titled_path, request.title_overlay):
-                final_video = titled_path
             else:
                 print("⚠️ Title overlay failed, continuing with captioned video")
-        # Upload final video
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         final_filename = f"captioned_from_srt_{timestamp}.mp4"
-        final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos")
-        # Cleanup
         shutil.rmtree(work_dir, ignore_errors=True)
         if final_url:
@@ -1614,13 +1490,14 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
 async def root():
     return {
         "name": "Text Styling API with Auto Caption",
-        "version": "3.2.0",
         "features": {
             "title_overlay": "✅",
             "manual_captions": "✅",
             "auto_transcription": "✅" if WHISPER_AVAILABLE else "❌",
             "srt_support": "✅",
-            "enhanced_colors": f"✅ ({len(COLOR_MAP)} colors)"
         },
         "endpoints": {
             "style": "POST /api/style - Add title overlay",

     position: str = "center"
     margin: int = 20
     padding: int = 10
+    outline_width: int = 0  # Width of text outline (0 = no outline)
+    outline_color: str = "black"  # Color of the outline
 class CaptionStyle(BaseModel):
     font_family: str
     margin: int = 20
     padding: int = 8
     max_width: int = 0  # 0 = no max width, otherwise will wrap text
+    outline_width: int = 0  # Width of text outline (0 = no outline)
+    outline_color: str = "black"  # Color of the outline
 class CaptionSegment(BaseModel):
     text: str
 def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work_dir: str, font_path: str) -> str:
+    """Create ASS subtitle file for captions with enhanced colors and outlines"""
     # Get actual font family name
     font_family_name = get_font_family_name(font_path)
     # Get font color and convert from RRGGBB to BBGGRR for ASS
     font_color_rgb = COLOR_MAP.get(style.color.lower(), "FFFFFF")
     font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
     print(f"🎨 Font color: {style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
+    # Get outline color and convert to BGR
+    outline_color_rgb = COLOR_MAP.get(style.outline_color.lower(), "000000")
+    outline_color_bgr = outline_color_rgb[4:6] + outline_color_rgb[2:4] + outline_color_rgb[0:2]
+    print(f"✏️ Outline color: {style.outline_color} -> RGB={outline_color_rgb} -> BGR={outline_color_bgr}, Width={style.outline_width}")
     # Parse background color with proper alpha calculation
     bg_parts = style.bg_color.split('@')
     bg_color_name = bg_parts[0].lower()
     bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
     bg_opacity = max(0, min(1, bg_opacity))
     bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
     bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
     bg_alpha = int((1 - bg_opacity) * 255)
     print(f"🎨 BG color: {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
         "left": 4, "center": 5, "right": 6,
         "top-left": 7, "top-center": 8, "top-right": 9
     }
+    alignment = pos_map.get(style.position, 2)
     # Calculate margins
     margin_l = style.margin if alignment in [1,4,7] else 0
     margin_r = style.margin if alignment in [3,6,9] else 0
     margin_v = style.margin
+    # Create ASS header with proper border settings and outline
+    # BorderStyle=1 with outline creates text border
     ass_header = f"""[Script Info]
 ; Script generated by Video Styling Space - Auto Caption
 ScriptType: v4.00+
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,{font_family_name},{style.font_size},&H00{font_color_bgr},&H000000FF,&H{outline_color_bgr},&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,1,{style.outline_width},0,{alignment},{margin_l},{margin_r},{margin_v},1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
     # Add caption events
     ass_events = []
     for i, caption in enumerate(captions):
         start = format_ass_time(caption.start_time)
         end = format_ass_time(caption.end_time)
         text = caption.text
         if style.max_width > 0:
             text = wrap_text_for_ass(text, style.max_width, font_family_name, style.font_size)
     with open(ass_file, 'w', encoding='utf-8') as f:
         f.write(ass_header + "\n".join(ass_events))
     print("\n=== ASS FILE DEBUG ===")
     with open(ass_file, 'r', encoding='utf-8') as f:
         print(f.read())
     print("=== END ASS DEBUG ===\n")
     print(f"📝 Created caption ASS file with {len(captions)} captions")
+    print(f"📝 Style line: BorderStyle=1, Outline={style.outline_width}, OutlineColor=&H{outline_color_bgr}")
     return ass_file
 # =============================================
         lines = block.strip().split('\n')
         if len(lines) >= 3:
             # Skip the index number (first line)
             timestamp_line = lines[1]
             # Parse timestamp format: 00:00:01,234 --> 00:00:04,567
     Convert SRT timestamp to seconds
     Format: 00:00:01,234 or 00:00:01.234
     """
     time_str = time_str.replace(',', '.')
     parts = time_str.split(':')
     if len(parts) == 3:
         hours = int(parts[0])
 # =============================================
 def create_text_overlay(input_video, output_video, text_style):
+    """Add text overlay using ASS subtitles with enhanced colors and outlines"""
     font_path = get_font_path(text_style.font_family)
     if not font_path:
         print(f"⚠️ Font not found: {text_style.font_family}")
     # Get font color and convert from RRGGBB to BBGGRR for ASS
     font_color_rgb = COLOR_MAP.get(text_style.color.lower(), "FFFFFF")
     font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
     print(f"🎨 Font color: {text_style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
+    # Get outline color and convert to BGR
+    outline_color_rgb = COLOR_MAP.get(text_style.outline_color.lower(), "000000")
+    outline_color_bgr = outline_color_rgb[4:6] + outline_color_rgb[2:4] + outline_color_rgb[0:2]
+    print(f"✏️ Outline color: {text_style.outline_color} -> RGB={outline_color_rgb} -> BGR={outline_color_bgr}, Width={text_style.outline_width}")
     # Parse background color
     bg_parts = text_style.bg_color.split('@')
     bg_color_name = bg_parts[0].lower()
     bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
     bg_opacity = max(0, min(1, bg_opacity))
     bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
     bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
     bg_alpha = int((1 - bg_opacity) * 255)
     print(f"🎨 BG color: {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
     fc_file = os.path.join(work_dir, "fonts.conf")
     with open(fc_file, 'w') as f:
         f.write(fc_config)
     os.environ['FONTCONFIG_FILE'] = fc_file
+    # Create ASS file content with solid rectangle background and outline
+    # BorderStyle=1 with outline creates text border
     ass_content = f"""[Script Info]
 ; Script generated by Video Styling Space
 ScriptType: v4.00+
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,{font_family_name},{text_style.font_size},&H00{font_color_bgr},&H000000FF,&H{outline_color_bgr},&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,1,{text_style.outline_width},0,{alignment},{margin_l},{margin_r},{margin_v},1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
     print("=== END ASS DEBUG ===\n")
     print(f"📝 Created ASS subtitle file with font family: {font_family_name}")
+    print(f"📝 Style line: BorderStyle=1, Outline={text_style.outline_width}, OutlineColor=&H{outline_color_bgr}")
+    # Run FFmpeg with ASS filter
+    cmd = [
         'ffmpeg', '-y',
         '-i', input_video,
+        '-vf', f"ass={ass_file}",
         '-c:a', 'copy',
         output_video
     ]
+    print(f"🎬 Running FFmpeg with ASS filter...")
+    result = subprocess.run(cmd, capture_output=True, text=True)
     if result.returncode == 0:
+        print(f"✅ ASS method succeeded with outline")
         return True
     else:
+        print(f"❌ ASS method failed: {result.stderr}")
+        return False
 # =============================================
 # DEBUG ENDPOINTS
         images = []
         for label, text in test_texts:
             img = Image.new('RGB', (1200, 400), color='white')
             d = ImageDraw.Draw(img)
             try:
                 font = ImageFont.truetype(font_path, 24)
                 d.text((10, 10), f"{label}:", fill='black', font=font)
                 d.text((10, 50), str(text)[:200], fill='black', font=font)
             except Exception as e:
                 d.text((10, 10), f"Error: {str(e)}", fill='red', font=ImageFont.load_default())
             buffered = BytesIO()
             img.save(buffered, format="PNG")
             img_base64 = base64.b64encode(buffered.getvalue()).decode()
             "fonttools_available": FONTTOOLS_AVAILABLE
         }
         if PIL_AVAILABLE:
             try:
                 font = ImageFont.truetype(font_path, 20)
                 info["pil_loads"] = False
                 info["pil_error"] = str(e)
         if FONTTOOLS_AVAILABLE:
             try:
                 from fontTools import ttLib
                 font = ttLib.TTFont(font_path)
                 name_records = {}
                 for record in font['name'].names:
                     try:
                 info["names"] = name_records
                 cmap = font.getBestCmap()
                 chinese_ranges = [
+                    (0x4E00, 0x9FFF),
+                    (0x3400, 0x4DBF),
+                    (0x20000, 0x2A6DF),
                 ]
                 has_chinese = False
         import base64
         from io import BytesIO
         test_strings = [
             ("English", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
             ("Numbers", "0123456789"),
         results = []
         for label, text in test_strings:
             img = Image.new('RGB', (800, 200), color='white')
             d = ImageDraw.Draw(img)
                 font = ImageFont.truetype(font_path, 36)
                 d.text((10, 50), f"{label}: {text}", fill='black', font=font)
                 buffered = BytesIO()
                 img.save(buffered, format="PNG")
                 img_base64 = base64.b64encode(buffered.getvalue()).decode()
                 results.append({
                     "label": label,
                     "text": text,
+                    "image": img_base64[:100] + "..."
                 })
             except Exception as e:
                 results.append({"label": label, "error": str(e)})
         import subprocess
         fc_list = subprocess.run(['fc-list', font_path], capture_output=True, text=True)
         audio_path = os.path.join(work_dir, "test_audio.mp3")
         download_file(audio_url, audio_path)
         captions = transcribe_audio_to_captions(audio_path, language)
         result = {
             "status": "success",
             "caption_count": len(captions),
                     "start": c.start_time,
                     "end": c.end_time
                 }
+                for c in captions[:10]
             ],
             "full_transcript": " ".join([c.text for c in captions])[:500] + "..." if captions else ""
         }
 async def test_srt_parsing(project_id: str, srt_filename: str):
     """Test parsing an SRT file from your dataset"""
     try:
         srt_url = f"https://huggingface.co/datasets/{VIDEO_DATASET}/resolve/main/data/projects/{project_id}/subtitles/{srt_filename}"
         response = requests.get(srt_url)
         if response.status_code != 200:
             return {"error": f"Failed to download SRT: {response.status_code}"}
         captions = parse_srt_file(response.text)
         return {
             "status": "success",
             "filename": srt_filename,
                     "start": c.start_time,
                     "end": c.end_time
                 }
+                for i, c in enumerate(captions[:5])
             ]
         }
     except Exception as e:
         work_dir = "/tmp/ass_test"
         os.makedirs(work_dir, exist_ok=True)
         ass_content = """[Script Info]
 ; Script generated for color testing
 ScriptType: v4.00+
         with open(ass_file, 'w', encoding='utf-8') as f:
             f.write(ass_content)
         with open(ass_file, 'r', encoding='utf-8') as f:
             content = f.read()
         work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
         os.makedirs(work_dir, exist_ok=True)
         video_path = os.path.join(work_dir, "input.mp4")
         download_file(request.video_url, video_path)
         current_video = video_path
         if request.title_overlay:
             titled_path = os.path.join(work_dir, "titled.mp4")
             if create_text_overlay(current_video, titled_path, request.title_overlay):
                     error="Failed to add text overlay"
                 )
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         styled_filename = f"styled_{timestamp}.mp4"
         styled_url = upload_to_dataset(current_video, request.project_id, styled_filename, "videos")
         shutil.rmtree(work_dir, ignore_errors=True)
         if styled_url:
         work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
         os.makedirs(work_dir, exist_ok=True)
         video_path = os.path.join(work_dir, "input.mp4")
         download_file(request.video_url, video_path)
         current_video = video_path
         font_path = get_font_path(request.caption_style.font_family)
         if not font_path:
             return CaptionResponse(
                 error=f"Caption font not found: {request.caption_style.font_family}"
             )
         font_dir = os.path.dirname(font_path)
         fc_config = f"""<?xml version="1.0"?>
 <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
             f.write(fc_config)
         os.environ['FONTCONFIG_FILE'] = fc_file
         ass_file = create_caption_ass(
             request.captions,
             request.caption_style,
             font_path
         )
         captioned_path = os.path.join(work_dir, "captioned.mp4")
         cmd = [
         current_video = captioned_path
         if request.title_overlay:
             titled_path = os.path.join(work_dir, "titled.mp4")
             if create_text_overlay(current_video, titled_path, request.title_overlay):
             else:
                 print("⚠️ Title overlay failed, continuing with captioned video")
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         final_filename = f"captioned_{timestamp}.mp4"
         final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
         shutil.rmtree(work_dir, ignore_errors=True)
         if final_url:
         work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
         os.makedirs(work_dir, exist_ok=True)
         video_path = os.path.join(work_dir, "input.mp4")
         download_file(request.video_url, video_path)
         audio_path = os.path.join(work_dir, "audio.mp3")
         if request.audio_url:
             print(f"📥 Downloading audio from: {request.audio_url}")
             download_file(request.audio_url, audio_path)
         else:
             print("🎵 No audio URL provided, extracting from video...")
             if not extract_audio_from_video(video_path, audio_path):
                 return TranscriptionResponse(
                     error="Failed to extract audio from video"
                 )
         print("📝 Transcribing audio...")
         try:
             captions = transcribe_audio_to_captions(audio_path, request.language)
                 error="No captions generated from audio"
             )
         font_path = get_font_path(request.caption_style.font_family)
         if not font_path:
             return TranscriptionResponse(
                 error=f"Caption font not found: {request.caption_style.font_family}"
             )
         font_dir = os.path.dirname(font_path)
         fc_config = f"""<?xml version="1.0"?>
 <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
             f.write(fc_config)
         os.environ['FONTCONFIG_FILE'] = fc_file
         ass_file = create_caption_ass(
             captions,
             request.caption_style,
             font_path
         )
         captioned_path = os.path.join(work_dir, "captioned.mp4")
         cmd = [
         current_video = captioned_path
         if request.title_overlay:
             titled_path = os.path.join(work_dir, "titled.mp4")
             if create_text_overlay(current_video, titled_path, request.title_overlay):
                 current_video = titled_path
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         final_filename = f"transcribed_{timestamp}.mp4"
         final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
         transcript = " ".join([c.text for c in captions])
         shutil.rmtree(work_dir, ignore_errors=True)
         if final_url:
 @app.post("/api/caption-from-srt", response_model=CaptionResponse)
 async def add_captions_from_srt(request: SrtCaptionRequest):
     """
+    Add captions to video using SRT file from URL
     """
     try:
         print(f"\n📝 Adding captions from SRT URL for project: {request.project_id}")
         work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
         os.makedirs(work_dir, exist_ok=True)
         video_path = os.path.join(work_dir, "input.mp4")
         download_file(request.video_url, video_path)
         srt_path = os.path.join(work_dir, "subtitles.srt")
         try:
             download_file(request.srt_url, srt_path)
                 error=f"Failed to download SRT file from URL: {str(e)}"
             )
         with open(srt_path, 'r', encoding='utf-8') as f:
             srt_content = f.read()
         print(f"✅ Parsed {len(captions)} captions from SRT")
         font_path = get_font_path(request.caption_style.font_family)
         if not font_path:
             return CaptionResponse(
                 error=f"Caption font not found: {request.caption_style.font_family}"
             )
         font_dir = os.path.dirname(font_path)
         fc_config = f"""<?xml version="1.0"?>
 <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
             f.write(fc_config)
         os.environ['FONTCONFIG_FILE'] = fc_file
+        ass_file = create_caption_ass(
+            captions,
+            request.caption_style,
+            work_dir,
+            font_path
+        )
+        captioned_path = os.path.join(work_dir, "captioned.mp4")
+        cmd = [
+            'ffmpeg', '-y',
+            '-i', video_path,
+            '-vf', f"ass={ass_file}",
+            '-c:a', 'copy',
+            captioned_path
+        ]
+        print(f"🎬 Applying captions from SRT...")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode != 0:
+            print(f"❌ FFmpeg error: {result.stderr}")
+            return CaptionResponse(
+                status="error",
+                project_id=request.project_id,
+                error=f"Failed to add captions: {result.stderr[:200]}"
             )
+        current_video = captioned_path
         if request.title_overlay:
             titled_path = os.path.join(work_dir, "titled.mp4")
+            if create_text_overlay(current_video, titled_path, request.title_overlay):
+                current_video = titled_path
             else:
                 print("⚠️ Title overlay failed, continuing with captioned video")
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         final_filename = f"captioned_from_srt_{timestamp}.mp4"
+        final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
         shutil.rmtree(work_dir, ignore_errors=True)
         if final_url:
 async def root():
     return {
         "name": "Text Styling API with Auto Caption",
+        "version": "3.4.0",
         "features": {
             "title_overlay": "✅",
             "manual_captions": "✅",
             "auto_transcription": "✅" if WHISPER_AVAILABLE else "❌",
             "srt_support": "✅",
+            "enhanced_colors": f"✅ ({len(COLOR_MAP)} colors)",
+            "text_outline": "✅"
         },
         "endpoints": {
             "style": "POST /api/style - Add title overlay",