yukee1992 commited on
Commit
47fb9d5
Β·
verified Β·
1 Parent(s): 00588c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -194
app.py CHANGED
@@ -202,6 +202,8 @@ class TextStyle(BaseModel):
202
  position: str = "center"
203
  margin: int = 20
204
  padding: int = 10
 
 
205
 
206
  class CaptionStyle(BaseModel):
207
  font_family: str
@@ -212,6 +214,8 @@ class CaptionStyle(BaseModel):
212
  margin: int = 20
213
  padding: int = 8
214
  max_width: int = 0 # 0 = no max width, otherwise will wrap text
 
 
215
 
216
  class CaptionSegment(BaseModel):
217
  text: str
@@ -466,7 +470,7 @@ def wrap_text_for_ass(text: str, max_width: int, font_family: str, font_size: in
466
 
467
 
468
  def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work_dir: str, font_path: str) -> str:
469
- """Create ASS subtitle file for captions with enhanced colors"""
470
 
471
  # Get actual font family name
472
  font_family_name = get_font_family_name(font_path)
@@ -474,21 +478,21 @@ def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work
474
 
475
  # Get font color and convert from RRGGBB to BBGGRR for ASS
476
  font_color_rgb = COLOR_MAP.get(style.color.lower(), "FFFFFF")
477
- # Convert RGB to BGR (swap first two and last two characters)
478
  font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
479
  print(f"🎨 Font color: {style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
480
 
 
 
 
 
 
481
  # Parse background color with proper alpha calculation
482
  bg_parts = style.bg_color.split('@')
483
  bg_color_name = bg_parts[0].lower()
484
- # Default opacity 0.5 if not specified
485
  bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
486
- # Clamp opacity between 0 and 1
487
  bg_opacity = max(0, min(1, bg_opacity))
488
  bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
489
- # Convert RGB to BGR for background
490
  bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
491
- # Alpha: 0 = opaque, 255 = transparent
492
  bg_alpha = int((1 - bg_opacity) * 255)
493
  print(f"🎨 BG color: {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
494
 
@@ -498,15 +502,15 @@ def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work
498
  "left": 4, "center": 5, "right": 6,
499
  "top-left": 7, "top-center": 8, "top-right": 9
500
  }
501
- alignment = pos_map.get(style.position, 2) # Default to bottom-center
502
 
503
  # Calculate margins
504
  margin_l = style.margin if alignment in [1,4,7] else 0
505
  margin_r = style.margin if alignment in [3,6,9] else 0
506
  margin_v = style.margin
507
 
508
- # Create ASS header with proper border settings
509
- # BorderStyle=3 means opaque box, Outline controls box padding
510
  ass_header = f"""[Script Info]
511
  ; Script generated by Video Styling Space - Auto Caption
512
  ScriptType: v4.00+
@@ -517,7 +521,7 @@ WrapStyle: 1
517
 
518
  [V4+ Styles]
519
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
520
- Style: Default,{font_family_name},{style.font_size},&H00{font_color_bgr},&H000000FF,&H00000000,&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,3,{style.padding},0,{alignment},{margin_l},{margin_r},{margin_v},1
521
 
522
  [Events]
523
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
@@ -526,11 +530,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
526
  # Add caption events
527
  ass_events = []
528
  for i, caption in enumerate(captions):
529
- # Format timestamps
530
  start = format_ass_time(caption.start_time)
531
  end = format_ass_time(caption.end_time)
532
 
533
- # Handle text wrapping if max_width specified
534
  text = caption.text
535
  if style.max_width > 0:
536
  text = wrap_text_for_ass(text, style.max_width, font_family_name, style.font_size)
@@ -542,17 +544,13 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
542
  with open(ass_file, 'w', encoding='utf-8') as f:
543
  f.write(ass_header + "\n".join(ass_events))
544
 
545
- # Debug: Print the ASS file content
546
  print("\n=== ASS FILE DEBUG ===")
547
  with open(ass_file, 'r', encoding='utf-8') as f:
548
  print(f.read())
549
  print("=== END ASS DEBUG ===\n")
550
 
551
  print(f"πŸ“ Created caption ASS file with {len(captions)} captions")
552
- print(f"πŸ“ Style line: BorderStyle=3, Outline={style.padding}, BackColor=&H{bg_alpha:02X}{bg_color_bgr}")
553
-
554
- # For captions, we need to use the ASS method (can't use drawtext for multiple timed captions)
555
- # But we've improved the ASS formatting for better background rendering
556
  return ass_file
557
 
558
  # =============================================
@@ -576,7 +574,6 @@ def parse_srt_file(srt_content: str) -> List[CaptionSegment]:
576
  lines = block.strip().split('\n')
577
  if len(lines) >= 3:
578
  # Skip the index number (first line)
579
- # Parse timestamp line (second line)
580
  timestamp_line = lines[1]
581
 
582
  # Parse timestamp format: 00:00:01,234 --> 00:00:04,567
@@ -603,10 +600,7 @@ def srt_time_to_seconds(time_str: str) -> float:
603
  Convert SRT timestamp to seconds
604
  Format: 00:00:01,234 or 00:00:01.234
605
  """
606
- # Replace comma with dot for millisecond parsing
607
  time_str = time_str.replace(',', '.')
608
-
609
- # Split into hours, minutes, seconds
610
  parts = time_str.split(':')
611
  if len(parts) == 3:
612
  hours = int(parts[0])
@@ -620,7 +614,7 @@ def srt_time_to_seconds(time_str: str) -> float:
620
  # =============================================
621
 
622
  def create_text_overlay(input_video, output_video, text_style):
623
- """Add text overlay using ASS subtitles with enhanced colors"""
624
  font_path = get_font_path(text_style.font_family)
625
  if not font_path:
626
  print(f"⚠️ Font not found: {text_style.font_family}")
@@ -646,19 +640,21 @@ def create_text_overlay(input_video, output_video, text_style):
646
 
647
  # Get font color and convert from RRGGBB to BBGGRR for ASS
648
  font_color_rgb = COLOR_MAP.get(text_style.color.lower(), "FFFFFF")
649
- # Convert RGB to BGR (swap first two and last two characters)
650
  font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
651
  print(f"🎨 Font color: {text_style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
652
 
 
 
 
 
 
653
  # Parse background color
654
  bg_parts = text_style.bg_color.split('@')
655
  bg_color_name = bg_parts[0].lower()
656
  bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
657
  bg_opacity = max(0, min(1, bg_opacity))
658
  bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
659
- # Convert RGB to BGR for background
660
  bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
661
- # Alpha: 0 = opaque, 255 = transparent
662
  bg_alpha = int((1 - bg_opacity) * 255)
663
  print(f"🎨 BG color: {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
664
 
@@ -686,12 +682,10 @@ def create_text_overlay(input_video, output_video, text_style):
686
  fc_file = os.path.join(work_dir, "fonts.conf")
687
  with open(fc_file, 'w') as f:
688
  f.write(fc_config)
689
-
690
- # Set environment variable for fontconfig
691
  os.environ['FONTCONFIG_FILE'] = fc_file
692
 
693
- # Create ASS file content with solid rectangle background
694
- # BorderStyle=3 gives solid box, Outline controls padding
695
  ass_content = f"""[Script Info]
696
  ; Script generated by Video Styling Space
697
  ScriptType: v4.00+
@@ -701,7 +695,7 @@ ScaledBorderAndShadow: yes
701
 
702
  [V4+ Styles]
703
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
704
- Style: Default,{font_family_name},{text_style.font_size},&H00{font_color_bgr},&H000000FF,&H00000000,&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,3,{text_style.padding},0,{alignment},{margin_l},{margin_r},{margin_v},1
705
 
706
  [Events]
707
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
@@ -718,56 +712,26 @@ Dialogue: 0,0:00:00.00,0:00:10.00,Default,,0,0,0,,{text_style.text}"""
718
  print("=== END ASS DEBUG ===\n")
719
 
720
  print(f"πŸ“ Created ASS subtitle file with font family: {font_family_name}")
721
- print(f"πŸ“ Style line: BorderStyle=3, Outline={text_style.padding}, BackColor=&H{bg_alpha:02X}{bg_color_bgr}")
722
-
723
- # Try drawtext method first (more reliable for backgrounds)
724
- print("🎬 Using drawtext method for reliable backgrounds...")
725
- drawtext_pos = {
726
- "bottom-left": "x=20:y=h-th-20",
727
- "bottom-center": "x=(w-tw)/2:y=h-th-20",
728
- "bottom-right": "x=w-tw-20:y=h-th-20",
729
- "center": "x=(w-tw)/2:y=(h-th)/2",
730
- "top-left": "x=20:y=20",
731
- "top-center": "x=(w-tw)/2:y=20",
732
- "top-right": "x=w-tw-20:y=20"
733
- }
734
- position = drawtext_pos.get(text_style.position, "x=(w-tw)/2:y=(h-th)/2")
735
 
736
- # Use the actual color name for drawtext (without round parameter)
737
- drawtext_cmd = [
738
  'ffmpeg', '-y',
739
  '-i', input_video,
740
- '-vf', f"drawtext=text='{text_style.text}':fontfile={font_path}:fontsize={text_style.font_size}:fontcolor={text_style.color}:{position}:box=1:boxcolor={bg_color_name}@{bg_opacity}:boxborderw={text_style.padding}",
741
  '-c:a', 'copy',
742
  output_video
743
  ]
744
 
745
- print(f"🎬 Running drawtext command...")
746
- result = subprocess.run(drawtext_cmd, capture_output=True, text=True)
747
 
748
  if result.returncode == 0:
749
- print(f"βœ… Drawtext method succeeded with solid background")
750
  return True
751
  else:
752
- print(f"❌ Drawtext failed: {result.stderr}")
753
- print("πŸ”„ Falling back to ASS method...")
754
-
755
- # Try ASS method as fallback
756
- cmd = [
757
- 'ffmpeg', '-y',
758
- '-i', input_video,
759
- '-vf', f"ass={ass_file}",
760
- '-c:a', 'copy',
761
- output_video
762
- ]
763
-
764
- result2 = subprocess.run(cmd, capture_output=True, text=True)
765
- if result2.returncode == 0:
766
- print(f"βœ… ASS method succeeded")
767
- return True
768
- else:
769
- print(f"❌ ASS method also failed: {result2.stderr}")
770
- return False
771
 
772
  # =============================================
773
  # DEBUG ENDPOINTS
@@ -800,21 +764,16 @@ async def debug_characters(font_name: str):
800
 
801
  images = []
802
  for label, text in test_texts:
803
- # Create image
804
  img = Image.new('RGB', (1200, 400), color='white')
805
  d = ImageDraw.Draw(img)
806
 
807
  try:
808
- # Try to load font with size 24
809
  font = ImageFont.truetype(font_path, 24)
810
- # Draw label
811
  d.text((10, 10), f"{label}:", fill='black', font=font)
812
- # Draw text (wrap if too long)
813
  d.text((10, 50), str(text)[:200], fill='black', font=font)
814
  except Exception as e:
815
  d.text((10, 10), f"Error: {str(e)}", fill='red', font=ImageFont.load_default())
816
 
817
- # Convert to base64
818
  buffered = BytesIO()
819
  img.save(buffered, format="PNG")
820
  img_base64 = base64.b64encode(buffered.getvalue()).decode()
@@ -852,7 +811,6 @@ async def font_info(font_name: str):
852
  "fonttools_available": FONTTOOLS_AVAILABLE
853
  }
854
 
855
- # Try to get basic info with PIL
856
  if PIL_AVAILABLE:
857
  try:
858
  font = ImageFont.truetype(font_path, 20)
@@ -861,13 +819,11 @@ async def font_info(font_name: str):
861
  info["pil_loads"] = False
862
  info["pil_error"] = str(e)
863
 
864
- # Try to get detailed info with fontTools
865
  if FONTTOOLS_AVAILABLE:
866
  try:
867
  from fontTools import ttLib
868
  font = ttLib.TTFont(font_path)
869
 
870
- # Get font name
871
  name_records = {}
872
  for record in font['name'].names:
873
  try:
@@ -884,12 +840,11 @@ async def font_info(font_name: str):
884
 
885
  info["names"] = name_records
886
 
887
- # Check for Chinese glyphs
888
  cmap = font.getBestCmap()
889
  chinese_ranges = [
890
- (0x4E00, 0x9FFF), # CJK Unified Ideographs
891
- (0x3400, 0x4DBF), # Extension A
892
- (0x20000, 0x2A6DF), # Extension B
893
  ]
894
 
895
  has_chinese = False
@@ -943,7 +898,6 @@ async def test_chars(font_name: str):
943
  import base64
944
  from io import BytesIO
945
 
946
- # Test different character sets
947
  test_strings = [
948
  ("English", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
949
  ("Numbers", "0123456789"),
@@ -954,7 +908,6 @@ async def test_chars(font_name: str):
954
 
955
  results = []
956
  for label, text in test_strings:
957
- # Create image
958
  img = Image.new('RGB', (800, 200), color='white')
959
  d = ImageDraw.Draw(img)
960
 
@@ -962,19 +915,17 @@ async def test_chars(font_name: str):
962
  font = ImageFont.truetype(font_path, 36)
963
  d.text((10, 50), f"{label}: {text}", fill='black', font=font)
964
 
965
- # Convert to base64
966
  buffered = BytesIO()
967
  img.save(buffered, format="PNG")
968
  img_base64 = base64.b64encode(buffered.getvalue()).decode()
969
  results.append({
970
  "label": label,
971
  "text": text,
972
- "image": img_base64[:100] + "..." # Truncate for display
973
  })
974
  except Exception as e:
975
  results.append({"label": label, "error": str(e)})
976
 
977
- # Also return font info
978
  import subprocess
979
  fc_list = subprocess.run(['fc-list', font_path], capture_output=True, text=True)
980
 
@@ -1002,10 +953,8 @@ async def test_transcription(audio_url: str, language: str = "zh"):
1002
  audio_path = os.path.join(work_dir, "test_audio.mp3")
1003
  download_file(audio_url, audio_path)
1004
 
1005
- # Test transcription
1006
  captions = transcribe_audio_to_captions(audio_path, language)
1007
 
1008
- # Format for display
1009
  result = {
1010
  "status": "success",
1011
  "caption_count": len(captions),
@@ -1015,7 +964,7 @@ async def test_transcription(audio_url: str, language: str = "zh"):
1015
  "start": c.start_time,
1016
  "end": c.end_time
1017
  }
1018
- for c in captions[:10] # Show first 10
1019
  ],
1020
  "full_transcript": " ".join([c.text for c in captions])[:500] + "..." if captions else ""
1021
  }
@@ -1029,17 +978,14 @@ async def test_transcription(audio_url: str, language: str = "zh"):
1029
  async def test_srt_parsing(project_id: str, srt_filename: str):
1030
  """Test parsing an SRT file from your dataset"""
1031
  try:
1032
- # Download SRT file
1033
  srt_url = f"https://huggingface.co/datasets/{VIDEO_DATASET}/resolve/main/data/projects/{project_id}/subtitles/{srt_filename}"
1034
 
1035
  response = requests.get(srt_url)
1036
  if response.status_code != 200:
1037
  return {"error": f"Failed to download SRT: {response.status_code}"}
1038
 
1039
- # Parse
1040
  captions = parse_srt_file(response.text)
1041
 
1042
- # Return preview
1043
  return {
1044
  "status": "success",
1045
  "filename": srt_filename,
@@ -1051,7 +997,7 @@ async def test_srt_parsing(project_id: str, srt_filename: str):
1051
  "start": c.start_time,
1052
  "end": c.end_time
1053
  }
1054
- for i, c in enumerate(captions[:5]) # Show first 5
1055
  ]
1056
  }
1057
  except Exception as e:
@@ -1077,7 +1023,6 @@ async def test_ass_colors():
1077
  work_dir = "/tmp/ass_test"
1078
  os.makedirs(work_dir, exist_ok=True)
1079
 
1080
- # Create a test ASS file with various colors
1081
  ass_content = """[Script Info]
1082
  ; Script generated for color testing
1083
  ScriptType: v4.00+
@@ -1097,7 +1042,6 @@ Dialogue: 0,0:00:05.00,0:00:10.00,Default,,0,0,0,,Dark Purple (should be indigo)
1097
  with open(ass_file, 'w', encoding='utf-8') as f:
1098
  f.write(ass_content)
1099
 
1100
- # Read it back to verify
1101
  with open(ass_file, 'r', encoding='utf-8') as f:
1102
  content = f.read()
1103
 
@@ -1138,13 +1082,11 @@ async def style_video(request: StylingRequest):
1138
  work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
1139
  os.makedirs(work_dir, exist_ok=True)
1140
 
1141
- # Download video
1142
  video_path = os.path.join(work_dir, "input.mp4")
1143
  download_file(request.video_url, video_path)
1144
 
1145
  current_video = video_path
1146
 
1147
- # Add title overlay if provided
1148
  if request.title_overlay:
1149
  titled_path = os.path.join(work_dir, "titled.mp4")
1150
  if create_text_overlay(current_video, titled_path, request.title_overlay):
@@ -1156,12 +1098,10 @@ async def style_video(request: StylingRequest):
1156
  error="Failed to add text overlay"
1157
  )
1158
 
1159
- # Upload styled video
1160
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1161
  styled_filename = f"styled_{timestamp}.mp4"
1162
  styled_url = upload_to_dataset(current_video, request.project_id, styled_filename, "videos")
1163
 
1164
- # Cleanup
1165
  shutil.rmtree(work_dir, ignore_errors=True)
1166
 
1167
  if styled_url:
@@ -1194,13 +1134,11 @@ async def add_captions_to_video(request: CaptionRequest):
1194
  work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
1195
  os.makedirs(work_dir, exist_ok=True)
1196
 
1197
- # Download video
1198
  video_path = os.path.join(work_dir, "input.mp4")
1199
  download_file(request.video_url, video_path)
1200
 
1201
  current_video = video_path
1202
 
1203
- # Get font path for captions
1204
  font_path = get_font_path(request.caption_style.font_family)
1205
  if not font_path:
1206
  return CaptionResponse(
@@ -1209,7 +1147,6 @@ async def add_captions_to_video(request: CaptionRequest):
1209
  error=f"Caption font not found: {request.caption_style.font_family}"
1210
  )
1211
 
1212
- # Create fontconfig config
1213
  font_dir = os.path.dirname(font_path)
1214
  fc_config = f"""<?xml version="1.0"?>
1215
  <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
@@ -1222,7 +1159,6 @@ async def add_captions_to_video(request: CaptionRequest):
1222
  f.write(fc_config)
1223
  os.environ['FONTCONFIG_FILE'] = fc_file
1224
 
1225
- # Create caption ASS file
1226
  ass_file = create_caption_ass(
1227
  request.captions,
1228
  request.caption_style,
@@ -1230,7 +1166,6 @@ async def add_captions_to_video(request: CaptionRequest):
1230
  font_path
1231
  )
1232
 
1233
- # Apply captions to video
1234
  captioned_path = os.path.join(work_dir, "captioned.mp4")
1235
 
1236
  cmd = [
@@ -1254,7 +1189,6 @@ async def add_captions_to_video(request: CaptionRequest):
1254
 
1255
  current_video = captioned_path
1256
 
1257
- # Add title overlay if provided
1258
  if request.title_overlay:
1259
  titled_path = os.path.join(work_dir, "titled.mp4")
1260
  if create_text_overlay(current_video, titled_path, request.title_overlay):
@@ -1262,12 +1196,10 @@ async def add_captions_to_video(request: CaptionRequest):
1262
  else:
1263
  print("⚠️ Title overlay failed, continuing with captioned video")
1264
 
1265
- # Upload final video
1266
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1267
  final_filename = f"captioned_{timestamp}.mp4"
1268
  final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
1269
 
1270
- # Cleanup
1271
  shutil.rmtree(work_dir, ignore_errors=True)
1272
 
1273
  if final_url:
@@ -1306,19 +1238,15 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1306
  work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
1307
  os.makedirs(work_dir, exist_ok=True)
1308
 
1309
- # Download video
1310
  video_path = os.path.join(work_dir, "input.mp4")
1311
  download_file(request.video_url, video_path)
1312
 
1313
- # Get audio for transcription
1314
  audio_path = os.path.join(work_dir, "audio.mp3")
1315
 
1316
  if request.audio_url:
1317
- # Download provided MP3
1318
  print(f"πŸ“₯ Downloading audio from: {request.audio_url}")
1319
  download_file(request.audio_url, audio_path)
1320
  else:
1321
- # Extract audio from video
1322
  print("🎡 No audio URL provided, extracting from video...")
1323
  if not extract_audio_from_video(video_path, audio_path):
1324
  return TranscriptionResponse(
@@ -1327,7 +1255,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1327
  error="Failed to extract audio from video"
1328
  )
1329
 
1330
- # Transcribe audio to captions
1331
  print("πŸ“ Transcribing audio...")
1332
  try:
1333
  captions = transcribe_audio_to_captions(audio_path, request.language)
@@ -1345,7 +1272,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1345
  error="No captions generated from audio"
1346
  )
1347
 
1348
- # Get font path for captions
1349
  font_path = get_font_path(request.caption_style.font_family)
1350
  if not font_path:
1351
  return TranscriptionResponse(
@@ -1354,7 +1280,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1354
  error=f"Caption font not found: {request.caption_style.font_family}"
1355
  )
1356
 
1357
- # Create fontconfig
1358
  font_dir = os.path.dirname(font_path)
1359
  fc_config = f"""<?xml version="1.0"?>
1360
  <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
@@ -1367,7 +1292,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1367
  f.write(fc_config)
1368
  os.environ['FONTCONFIG_FILE'] = fc_file
1369
 
1370
- # Create caption ASS file
1371
  ass_file = create_caption_ass(
1372
  captions,
1373
  request.caption_style,
@@ -1375,7 +1299,6 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1375
  font_path
1376
  )
1377
 
1378
- # Apply captions to video
1379
  captioned_path = os.path.join(work_dir, "captioned.mp4")
1380
 
1381
  cmd = [
@@ -1399,21 +1322,17 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1399
 
1400
  current_video = captioned_path
1401
 
1402
- # Add title overlay if provided
1403
  if request.title_overlay:
1404
  titled_path = os.path.join(work_dir, "titled.mp4")
1405
  if create_text_overlay(current_video, titled_path, request.title_overlay):
1406
  current_video = titled_path
1407
 
1408
- # Upload final video
1409
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1410
  final_filename = f"transcribed_{timestamp}.mp4"
1411
  final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
1412
 
1413
- # Get plain transcript
1414
  transcript = " ".join([c.text for c in captions])
1415
 
1416
- # Cleanup
1417
  shutil.rmtree(work_dir, ignore_errors=True)
1418
 
1419
  if final_url:
@@ -1444,7 +1363,7 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1444
  @app.post("/api/caption-from-srt", response_model=CaptionResponse)
1445
  async def add_captions_from_srt(request: SrtCaptionRequest):
1446
  """
1447
- Add captions to video using SRT file from URL - Using drawtext for each caption
1448
  """
1449
  try:
1450
  print(f"\nπŸ“ Adding captions from SRT URL for project: {request.project_id}")
@@ -1452,11 +1371,9 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
1452
  work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
1453
  os.makedirs(work_dir, exist_ok=True)
1454
 
1455
- # Download video
1456
  video_path = os.path.join(work_dir, "input.mp4")
1457
  download_file(request.video_url, video_path)
1458
 
1459
- # Download SRT file from URL
1460
  srt_path = os.path.join(work_dir, "subtitles.srt")
1461
  try:
1462
  download_file(request.srt_url, srt_path)
@@ -1468,7 +1385,6 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
1468
  error=f"Failed to download SRT file from URL: {str(e)}"
1469
  )
1470
 
1471
- # Parse SRT file
1472
  with open(srt_path, 'r', encoding='utf-8') as f:
1473
  srt_content = f.read()
1474
 
@@ -1483,7 +1399,6 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
1483
 
1484
  print(f"βœ… Parsed {len(captions)} captions from SRT")
1485
 
1486
- # Get font path for captions
1487
  font_path = get_font_path(request.caption_style.font_family)
1488
  if not font_path:
1489
  return CaptionResponse(
@@ -1492,7 +1407,6 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
1492
  error=f"Caption font not found: {request.caption_style.font_family}"
1493
  )
1494
 
1495
- # Create fontconfig
1496
  font_dir = os.path.dirname(font_path)
1497
  fc_config = f"""<?xml version="1.0"?>
1498
  <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
@@ -1505,85 +1419,47 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
1505
  f.write(fc_config)
1506
  os.environ['FONTCONFIG_FILE'] = fc_file
1507
 
1508
- # Get margin value from request
1509
- margin = request.caption_style.margin
1510
- print(f"πŸ“ Using margin: {margin} pixels")
1511
-
1512
- # Map position to drawtext position with margin
1513
- drawtext_pos = {
1514
- "bottom-left": f"x={margin}:y=h-th-{margin}",
1515
- "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
1516
- "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
1517
- "center": "x=(w-tw)/2:y=(h-th)/2",
1518
- "left": f"x={margin}:y=(h-th)/2",
1519
- "right": f"x=w-tw-{margin}:y=(h-th)/2",
1520
- "top-left": f"x={margin}:y={margin}",
1521
- "top-center": f"x=(w-tw)/2:y={margin}",
1522
- "top-right": f"x=w-tw-{margin}:y={margin}"
1523
- }
1524
- position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}")
1525
- print(f"πŸ“ Position string: {position}")
1526
 
1527
- # Process each caption with its own drawtext filter
1528
- current_input = video_path
1529
 
1530
- for i, caption in enumerate(captions):
1531
- # Create a temporary file for this step
1532
- temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4")
1533
-
1534
- # Build drawtext filter for this caption with margin (no round parameter)
1535
- drawtext_filter = (
1536
- f"drawtext=text='{caption.text}':"
1537
- f"fontfile={font_path}:"
1538
- f"fontsize={request.caption_style.font_size}:"
1539
- f"fontcolor={request.caption_style.color}:"
1540
- f"{position}:"
1541
- f"box=1:"
1542
- f"boxcolor={request.caption_style.bg_color}:"
1543
- f"boxborderw={request.caption_style.padding}:"
1544
- f"enable='between(t,{caption.start_time},{caption.end_time})'"
 
 
1545
  )
1546
-
1547
- # Apply filter to current input
1548
- cmd = [
1549
- 'ffmpeg', '-y',
1550
- '-i', current_input,
1551
- '-vf', drawtext_filter,
1552
- '-c:a', 'copy',
1553
- temp_output
1554
- ]
1555
-
1556
- print(f"🎬 Adding caption {i+1}/{len(captions)}: '{caption.text}' at margin {margin}")
1557
- result = subprocess.run(cmd, capture_output=True, text=True)
1558
-
1559
- if result.returncode != 0:
1560
- print(f"❌ FFmpeg error at caption {i+1}: {result.stderr}")
1561
- return CaptionResponse(
1562
- status="error",
1563
- project_id=request.project_id,
1564
- error=f"Failed to add caption {i+1}: {result.stderr[:200]}"
1565
- )
1566
-
1567
- # Update for next iteration
1568
- current_input = temp_output
1569
 
1570
- # Final output is the last temp file
1571
- final_video = current_input
1572
 
1573
- # Add title overlay if provided
1574
  if request.title_overlay:
1575
  titled_path = os.path.join(work_dir, "titled.mp4")
1576
- if create_text_overlay(final_video, titled_path, request.title_overlay):
1577
- final_video = titled_path
1578
  else:
1579
  print("⚠️ Title overlay failed, continuing with captioned video")
1580
 
1581
- # Upload final video
1582
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1583
  final_filename = f"captioned_from_srt_{timestamp}.mp4"
1584
- final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos")
1585
 
1586
- # Cleanup
1587
  shutil.rmtree(work_dir, ignore_errors=True)
1588
 
1589
  if final_url:
@@ -1614,13 +1490,14 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
1614
  async def root():
1615
  return {
1616
  "name": "Text Styling API with Auto Caption",
1617
- "version": "3.2.0",
1618
  "features": {
1619
  "title_overlay": "βœ…",
1620
  "manual_captions": "βœ…",
1621
  "auto_transcription": "βœ…" if WHISPER_AVAILABLE else "❌",
1622
  "srt_support": "βœ…",
1623
- "enhanced_colors": f"βœ… ({len(COLOR_MAP)} colors)"
 
1624
  },
1625
  "endpoints": {
1626
  "style": "POST /api/style - Add title overlay",
 
202
  position: str = "center"
203
  margin: int = 20
204
  padding: int = 10
205
+ outline_width: int = 0 # Width of text outline (0 = no outline)
206
+ outline_color: str = "black" # Color of the outline
207
 
208
  class CaptionStyle(BaseModel):
209
  font_family: str
 
214
  margin: int = 20
215
  padding: int = 8
216
  max_width: int = 0 # 0 = no max width, otherwise will wrap text
217
+ outline_width: int = 0 # Width of text outline (0 = no outline)
218
+ outline_color: str = "black" # Color of the outline
219
 
220
  class CaptionSegment(BaseModel):
221
  text: str
 
470
 
471
 
472
  def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work_dir: str, font_path: str) -> str:
473
+ """Create ASS subtitle file for captions with enhanced colors and outlines"""
474
 
475
  # Get actual font family name
476
  font_family_name = get_font_family_name(font_path)
 
478
 
479
  # Get font color and convert from RRGGBB to BBGGRR for ASS
480
  font_color_rgb = COLOR_MAP.get(style.color.lower(), "FFFFFF")
 
481
  font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
482
  print(f"🎨 Font color: {style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
483
 
484
+ # Get outline color and convert to BGR
485
+ outline_color_rgb = COLOR_MAP.get(style.outline_color.lower(), "000000")
486
+ outline_color_bgr = outline_color_rgb[4:6] + outline_color_rgb[2:4] + outline_color_rgb[0:2]
487
+ print(f"✏️ Outline color: {style.outline_color} -> RGB={outline_color_rgb} -> BGR={outline_color_bgr}, Width={style.outline_width}")
488
+
489
  # Parse background color with proper alpha calculation
490
  bg_parts = style.bg_color.split('@')
491
  bg_color_name = bg_parts[0].lower()
 
492
  bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
 
493
  bg_opacity = max(0, min(1, bg_opacity))
494
  bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
 
495
  bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
 
496
  bg_alpha = int((1 - bg_opacity) * 255)
497
  print(f"🎨 BG color: {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
498
 
 
502
  "left": 4, "center": 5, "right": 6,
503
  "top-left": 7, "top-center": 8, "top-right": 9
504
  }
505
+ alignment = pos_map.get(style.position, 2)
506
 
507
  # Calculate margins
508
  margin_l = style.margin if alignment in [1,4,7] else 0
509
  margin_r = style.margin if alignment in [3,6,9] else 0
510
  margin_v = style.margin
511
 
512
+ # Create ASS header with proper border settings and outline
513
+ # BorderStyle=1 with outline creates text border
514
  ass_header = f"""[Script Info]
515
  ; Script generated by Video Styling Space - Auto Caption
516
  ScriptType: v4.00+
 
521
 
522
  [V4+ Styles]
523
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
524
+ Style: Default,{font_family_name},{style.font_size},&H00{font_color_bgr},&H000000FF,&H{outline_color_bgr},&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,1,{style.outline_width},0,{alignment},{margin_l},{margin_r},{margin_v},1
525
 
526
  [Events]
527
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
530
  # Add caption events
531
  ass_events = []
532
  for i, caption in enumerate(captions):
 
533
  start = format_ass_time(caption.start_time)
534
  end = format_ass_time(caption.end_time)
535
 
 
536
  text = caption.text
537
  if style.max_width > 0:
538
  text = wrap_text_for_ass(text, style.max_width, font_family_name, style.font_size)
 
544
  with open(ass_file, 'w', encoding='utf-8') as f:
545
  f.write(ass_header + "\n".join(ass_events))
546
 
 
547
  print("\n=== ASS FILE DEBUG ===")
548
  with open(ass_file, 'r', encoding='utf-8') as f:
549
  print(f.read())
550
  print("=== END ASS DEBUG ===\n")
551
 
552
  print(f"πŸ“ Created caption ASS file with {len(captions)} captions")
553
+ print(f"πŸ“ Style line: BorderStyle=1, Outline={style.outline_width}, OutlineColor=&H{outline_color_bgr}")
 
 
 
554
  return ass_file
555
 
556
  # =============================================
 
574
  lines = block.strip().split('\n')
575
  if len(lines) >= 3:
576
  # Skip the index number (first line)
 
577
  timestamp_line = lines[1]
578
 
579
  # Parse timestamp format: 00:00:01,234 --> 00:00:04,567
 
600
  Convert SRT timestamp to seconds
601
  Format: 00:00:01,234 or 00:00:01.234
602
  """
 
603
  time_str = time_str.replace(',', '.')
 
 
604
  parts = time_str.split(':')
605
  if len(parts) == 3:
606
  hours = int(parts[0])
 
614
  # =============================================
615
 
616
  def create_text_overlay(input_video, output_video, text_style):
617
+ """Add text overlay using ASS subtitles with enhanced colors and outlines"""
618
  font_path = get_font_path(text_style.font_family)
619
  if not font_path:
620
  print(f"⚠️ Font not found: {text_style.font_family}")
 
640
 
641
  # Get font color and convert from RRGGBB to BBGGRR for ASS
642
  font_color_rgb = COLOR_MAP.get(text_style.color.lower(), "FFFFFF")
 
643
  font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
644
  print(f"🎨 Font color: {text_style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
645
 
646
+ # Get outline color and convert to BGR
647
+ outline_color_rgb = COLOR_MAP.get(text_style.outline_color.lower(), "000000")
648
+ outline_color_bgr = outline_color_rgb[4:6] + outline_color_rgb[2:4] + outline_color_rgb[0:2]
649
+ print(f"✏️ Outline color: {text_style.outline_color} -> RGB={outline_color_rgb} -> BGR={outline_color_bgr}, Width={text_style.outline_width}")
650
+
651
  # Parse background color
652
  bg_parts = text_style.bg_color.split('@')
653
  bg_color_name = bg_parts[0].lower()
654
  bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
655
  bg_opacity = max(0, min(1, bg_opacity))
656
  bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
 
657
  bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
 
658
  bg_alpha = int((1 - bg_opacity) * 255)
659
  print(f"🎨 BG color: {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
660
 
 
682
  fc_file = os.path.join(work_dir, "fonts.conf")
683
  with open(fc_file, 'w') as f:
684
  f.write(fc_config)
 
 
685
  os.environ['FONTCONFIG_FILE'] = fc_file
686
 
687
+ # Create ASS file content with solid rectangle background and outline
688
+ # BorderStyle=1 with outline creates text border
689
  ass_content = f"""[Script Info]
690
  ; Script generated by Video Styling Space
691
  ScriptType: v4.00+
 
695
 
696
  [V4+ Styles]
697
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
698
+ Style: Default,{font_family_name},{text_style.font_size},&H00{font_color_bgr},&H000000FF,&H{outline_color_bgr},&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,1,{text_style.outline_width},0,{alignment},{margin_l},{margin_r},{margin_v},1
699
 
700
  [Events]
701
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
712
  print("=== END ASS DEBUG ===\n")
713
 
714
  print(f"πŸ“ Created ASS subtitle file with font family: {font_family_name}")
715
+ print(f"πŸ“ Style line: BorderStyle=1, Outline={text_style.outline_width}, OutlineColor=&H{outline_color_bgr}")
 
 
 
 
 
 
 
 
 
 
 
 
 
716
 
717
+ # Run FFmpeg with ASS filter
718
+ cmd = [
719
  'ffmpeg', '-y',
720
  '-i', input_video,
721
+ '-vf', f"ass={ass_file}",
722
  '-c:a', 'copy',
723
  output_video
724
  ]
725
 
726
+ print(f"🎬 Running FFmpeg with ASS filter...")
727
+ result = subprocess.run(cmd, capture_output=True, text=True)
728
 
729
  if result.returncode == 0:
730
+ print(f"βœ… ASS method succeeded with outline")
731
  return True
732
  else:
733
+ print(f"❌ ASS method failed: {result.stderr}")
734
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
 
736
  # =============================================
737
  # DEBUG ENDPOINTS
 
764
 
765
  images = []
766
  for label, text in test_texts:
 
767
  img = Image.new('RGB', (1200, 400), color='white')
768
  d = ImageDraw.Draw(img)
769
 
770
  try:
 
771
  font = ImageFont.truetype(font_path, 24)
 
772
  d.text((10, 10), f"{label}:", fill='black', font=font)
 
773
  d.text((10, 50), str(text)[:200], fill='black', font=font)
774
  except Exception as e:
775
  d.text((10, 10), f"Error: {str(e)}", fill='red', font=ImageFont.load_default())
776
 
 
777
  buffered = BytesIO()
778
  img.save(buffered, format="PNG")
779
  img_base64 = base64.b64encode(buffered.getvalue()).decode()
 
811
  "fonttools_available": FONTTOOLS_AVAILABLE
812
  }
813
 
 
814
  if PIL_AVAILABLE:
815
  try:
816
  font = ImageFont.truetype(font_path, 20)
 
819
  info["pil_loads"] = False
820
  info["pil_error"] = str(e)
821
 
 
822
  if FONTTOOLS_AVAILABLE:
823
  try:
824
  from fontTools import ttLib
825
  font = ttLib.TTFont(font_path)
826
 
 
827
  name_records = {}
828
  for record in font['name'].names:
829
  try:
 
840
 
841
  info["names"] = name_records
842
 
 
843
  cmap = font.getBestCmap()
844
  chinese_ranges = [
845
+ (0x4E00, 0x9FFF),
846
+ (0x3400, 0x4DBF),
847
+ (0x20000, 0x2A6DF),
848
  ]
849
 
850
  has_chinese = False
 
898
  import base64
899
  from io import BytesIO
900
 
 
901
  test_strings = [
902
  ("English", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
903
  ("Numbers", "0123456789"),
 
908
 
909
  results = []
910
  for label, text in test_strings:
 
911
  img = Image.new('RGB', (800, 200), color='white')
912
  d = ImageDraw.Draw(img)
913
 
 
915
  font = ImageFont.truetype(font_path, 36)
916
  d.text((10, 50), f"{label}: {text}", fill='black', font=font)
917
 
 
918
  buffered = BytesIO()
919
  img.save(buffered, format="PNG")
920
  img_base64 = base64.b64encode(buffered.getvalue()).decode()
921
  results.append({
922
  "label": label,
923
  "text": text,
924
+ "image": img_base64[:100] + "..."
925
  })
926
  except Exception as e:
927
  results.append({"label": label, "error": str(e)})
928
 
 
929
  import subprocess
930
  fc_list = subprocess.run(['fc-list', font_path], capture_output=True, text=True)
931
 
 
953
  audio_path = os.path.join(work_dir, "test_audio.mp3")
954
  download_file(audio_url, audio_path)
955
 
 
956
  captions = transcribe_audio_to_captions(audio_path, language)
957
 
 
958
  result = {
959
  "status": "success",
960
  "caption_count": len(captions),
 
964
  "start": c.start_time,
965
  "end": c.end_time
966
  }
967
+ for c in captions[:10]
968
  ],
969
  "full_transcript": " ".join([c.text for c in captions])[:500] + "..." if captions else ""
970
  }
 
978
  async def test_srt_parsing(project_id: str, srt_filename: str):
979
  """Test parsing an SRT file from your dataset"""
980
  try:
 
981
  srt_url = f"https://huggingface.co/datasets/{VIDEO_DATASET}/resolve/main/data/projects/{project_id}/subtitles/{srt_filename}"
982
 
983
  response = requests.get(srt_url)
984
  if response.status_code != 200:
985
  return {"error": f"Failed to download SRT: {response.status_code}"}
986
 
 
987
  captions = parse_srt_file(response.text)
988
 
 
989
  return {
990
  "status": "success",
991
  "filename": srt_filename,
 
997
  "start": c.start_time,
998
  "end": c.end_time
999
  }
1000
+ for i, c in enumerate(captions[:5])
1001
  ]
1002
  }
1003
  except Exception as e:
 
1023
  work_dir = "/tmp/ass_test"
1024
  os.makedirs(work_dir, exist_ok=True)
1025
 
 
1026
  ass_content = """[Script Info]
1027
  ; Script generated for color testing
1028
  ScriptType: v4.00+
 
1042
  with open(ass_file, 'w', encoding='utf-8') as f:
1043
  f.write(ass_content)
1044
 
 
1045
  with open(ass_file, 'r', encoding='utf-8') as f:
1046
  content = f.read()
1047
 
 
1082
  work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
1083
  os.makedirs(work_dir, exist_ok=True)
1084
 
 
1085
  video_path = os.path.join(work_dir, "input.mp4")
1086
  download_file(request.video_url, video_path)
1087
 
1088
  current_video = video_path
1089
 
 
1090
  if request.title_overlay:
1091
  titled_path = os.path.join(work_dir, "titled.mp4")
1092
  if create_text_overlay(current_video, titled_path, request.title_overlay):
 
1098
  error="Failed to add text overlay"
1099
  )
1100
 
 
1101
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1102
  styled_filename = f"styled_{timestamp}.mp4"
1103
  styled_url = upload_to_dataset(current_video, request.project_id, styled_filename, "videos")
1104
 
 
1105
  shutil.rmtree(work_dir, ignore_errors=True)
1106
 
1107
  if styled_url:
 
1134
  work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
1135
  os.makedirs(work_dir, exist_ok=True)
1136
 
 
1137
  video_path = os.path.join(work_dir, "input.mp4")
1138
  download_file(request.video_url, video_path)
1139
 
1140
  current_video = video_path
1141
 
 
1142
  font_path = get_font_path(request.caption_style.font_family)
1143
  if not font_path:
1144
  return CaptionResponse(
 
1147
  error=f"Caption font not found: {request.caption_style.font_family}"
1148
  )
1149
 
 
1150
  font_dir = os.path.dirname(font_path)
1151
  fc_config = f"""<?xml version="1.0"?>
1152
  <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
 
1159
  f.write(fc_config)
1160
  os.environ['FONTCONFIG_FILE'] = fc_file
1161
 
 
1162
  ass_file = create_caption_ass(
1163
  request.captions,
1164
  request.caption_style,
 
1166
  font_path
1167
  )
1168
 
 
1169
  captioned_path = os.path.join(work_dir, "captioned.mp4")
1170
 
1171
  cmd = [
 
1189
 
1190
  current_video = captioned_path
1191
 
 
1192
  if request.title_overlay:
1193
  titled_path = os.path.join(work_dir, "titled.mp4")
1194
  if create_text_overlay(current_video, titled_path, request.title_overlay):
 
1196
  else:
1197
  print("⚠️ Title overlay failed, continuing with captioned video")
1198
 
 
1199
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1200
  final_filename = f"captioned_{timestamp}.mp4"
1201
  final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
1202
 
 
1203
  shutil.rmtree(work_dir, ignore_errors=True)
1204
 
1205
  if final_url:
 
1238
  work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
1239
  os.makedirs(work_dir, exist_ok=True)
1240
 
 
1241
  video_path = os.path.join(work_dir, "input.mp4")
1242
  download_file(request.video_url, video_path)
1243
 
 
1244
  audio_path = os.path.join(work_dir, "audio.mp3")
1245
 
1246
  if request.audio_url:
 
1247
  print(f"πŸ“₯ Downloading audio from: {request.audio_url}")
1248
  download_file(request.audio_url, audio_path)
1249
  else:
 
1250
  print("🎡 No audio URL provided, extracting from video...")
1251
  if not extract_audio_from_video(video_path, audio_path):
1252
  return TranscriptionResponse(
 
1255
  error="Failed to extract audio from video"
1256
  )
1257
 
 
1258
  print("πŸ“ Transcribing audio...")
1259
  try:
1260
  captions = transcribe_audio_to_captions(audio_path, request.language)
 
1272
  error="No captions generated from audio"
1273
  )
1274
 
 
1275
  font_path = get_font_path(request.caption_style.font_family)
1276
  if not font_path:
1277
  return TranscriptionResponse(
 
1280
  error=f"Caption font not found: {request.caption_style.font_family}"
1281
  )
1282
 
 
1283
  font_dir = os.path.dirname(font_path)
1284
  fc_config = f"""<?xml version="1.0"?>
1285
  <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
 
1292
  f.write(fc_config)
1293
  os.environ['FONTCONFIG_FILE'] = fc_file
1294
 
 
1295
  ass_file = create_caption_ass(
1296
  captions,
1297
  request.caption_style,
 
1299
  font_path
1300
  )
1301
 
 
1302
  captioned_path = os.path.join(work_dir, "captioned.mp4")
1303
 
1304
  cmd = [
 
1322
 
1323
  current_video = captioned_path
1324
 
 
1325
  if request.title_overlay:
1326
  titled_path = os.path.join(work_dir, "titled.mp4")
1327
  if create_text_overlay(current_video, titled_path, request.title_overlay):
1328
  current_video = titled_path
1329
 
 
1330
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1331
  final_filename = f"transcribed_{timestamp}.mp4"
1332
  final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
1333
 
 
1334
  transcript = " ".join([c.text for c in captions])
1335
 
 
1336
  shutil.rmtree(work_dir, ignore_errors=True)
1337
 
1338
  if final_url:
 
1363
  @app.post("/api/caption-from-srt", response_model=CaptionResponse)
1364
  async def add_captions_from_srt(request: SrtCaptionRequest):
1365
  """
1366
+ Add captions to video using SRT file from URL
1367
  """
1368
  try:
1369
  print(f"\nπŸ“ Adding captions from SRT URL for project: {request.project_id}")
 
1371
  work_dir = f"/tmp/styling/{request.project_id}_{uuid.uuid4().hex[:8]}"
1372
  os.makedirs(work_dir, exist_ok=True)
1373
 
 
1374
  video_path = os.path.join(work_dir, "input.mp4")
1375
  download_file(request.video_url, video_path)
1376
 
 
1377
  srt_path = os.path.join(work_dir, "subtitles.srt")
1378
  try:
1379
  download_file(request.srt_url, srt_path)
 
1385
  error=f"Failed to download SRT file from URL: {str(e)}"
1386
  )
1387
 
 
1388
  with open(srt_path, 'r', encoding='utf-8') as f:
1389
  srt_content = f.read()
1390
 
 
1399
 
1400
  print(f"βœ… Parsed {len(captions)} captions from SRT")
1401
 
 
1402
  font_path = get_font_path(request.caption_style.font_family)
1403
  if not font_path:
1404
  return CaptionResponse(
 
1407
  error=f"Caption font not found: {request.caption_style.font_family}"
1408
  )
1409
 
 
1410
  font_dir = os.path.dirname(font_path)
1411
  fc_config = f"""<?xml version="1.0"?>
1412
  <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
 
1419
  f.write(fc_config)
1420
  os.environ['FONTCONFIG_FILE'] = fc_file
1421
 
1422
+ ass_file = create_caption_ass(
1423
+ captions,
1424
+ request.caption_style,
1425
+ work_dir,
1426
+ font_path
1427
+ )
 
 
 
 
 
 
 
 
 
 
 
 
1428
 
1429
+ captioned_path = os.path.join(work_dir, "captioned.mp4")
 
1430
 
1431
+ cmd = [
1432
+ 'ffmpeg', '-y',
1433
+ '-i', video_path,
1434
+ '-vf', f"ass={ass_file}",
1435
+ '-c:a', 'copy',
1436
+ captioned_path
1437
+ ]
1438
+
1439
+ print(f"🎬 Applying captions from SRT...")
1440
+ result = subprocess.run(cmd, capture_output=True, text=True)
1441
+
1442
+ if result.returncode != 0:
1443
+ print(f"❌ FFmpeg error: {result.stderr}")
1444
+ return CaptionResponse(
1445
+ status="error",
1446
+ project_id=request.project_id,
1447
+ error=f"Failed to add captions: {result.stderr[:200]}"
1448
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1449
 
1450
+ current_video = captioned_path
 
1451
 
 
1452
  if request.title_overlay:
1453
  titled_path = os.path.join(work_dir, "titled.mp4")
1454
+ if create_text_overlay(current_video, titled_path, request.title_overlay):
1455
+ current_video = titled_path
1456
  else:
1457
  print("⚠️ Title overlay failed, continuing with captioned video")
1458
 
 
1459
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1460
  final_filename = f"captioned_from_srt_{timestamp}.mp4"
1461
+ final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
1462
 
 
1463
  shutil.rmtree(work_dir, ignore_errors=True)
1464
 
1465
  if final_url:
 
1490
  async def root():
1491
  return {
1492
  "name": "Text Styling API with Auto Caption",
1493
+ "version": "3.4.0",
1494
  "features": {
1495
  "title_overlay": "βœ…",
1496
  "manual_captions": "βœ…",
1497
  "auto_transcription": "βœ…" if WHISPER_AVAILABLE else "❌",
1498
  "srt_support": "βœ…",
1499
+ "enhanced_colors": f"βœ… ({len(COLOR_MAP)} colors)",
1500
+ "text_outline": "βœ…"
1501
  },
1502
  "endpoints": {
1503
  "style": "POST /api/style - Add title overlay",