yukee1992 commited on
Commit
e748175
Β·
verified Β·
1 Parent(s): 7a161cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +206 -302
app.py CHANGED
@@ -468,92 +468,6 @@ def wrap_text_for_ass(text: str, max_width: int, font_family: str, font_size: in
468
 
469
  return "\\N".join(lines)
470
 
471
-
472
- def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work_dir: str, font_path: str) -> str:
473
- """Create ASS subtitle file for captions with enhanced colors and outlines"""
474
-
475
- # Get actual font family name
476
- font_family_name = get_font_family_name(font_path)
477
- print(f"πŸ“ Caption font family: {font_family_name}")
478
-
479
- # Get font color and convert from RRGGBB to BBGGRR for ASS
480
- font_color_rgb = COLOR_MAP.get(style.color.lower(), "FFFFFF")
481
- font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
482
- print(f"🎨 Font color (text): {style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
483
-
484
- # Get outline color and convert to BGR
485
- outline_color_rgb = COLOR_MAP.get(style.outline_color.lower(), "000000")
486
- outline_color_bgr = outline_color_rgb[4:6] + outline_color_rgb[2:4] + outline_color_rgb[0:2]
487
- print(f"✏️ Outline color (border): {style.outline_color} -> RGB={outline_color_rgb} -> BGR={outline_color_bgr}, Width={style.outline_width}")
488
-
489
- # Parse background color with proper alpha calculation
490
- bg_parts = style.bg_color.split('@')
491
- bg_color_name = bg_parts[0].lower()
492
- bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
493
- bg_opacity = max(0, min(1, bg_opacity))
494
- bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
495
- bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
496
- bg_alpha = int((1 - bg_opacity) * 255)
497
- print(f"🎨 BG color (rectangle): {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
498
-
499
- # Map position to ASS alignment
500
- pos_map = {
501
- "bottom-left": 1, "bottom-center": 2, "bottom-right": 3,
502
- "left": 4, "center": 5, "right": 6,
503
- "top-left": 7, "top-center": 8, "top-right": 9
504
- }
505
- alignment = pos_map.get(style.position, 2)
506
-
507
- # Calculate margins
508
- margin_l = style.margin if alignment in [1,4,7] else 0
509
- margin_r = style.margin if alignment in [3,6,9] else 0
510
- margin_v = style.margin
511
-
512
- # Create ASS header with proper border settings
513
- # BorderStyle=3 gives solid box, Outline adds text border
514
- # Format: &HAABBGGRR where AA is alpha, BB blue, GG green, RR red
515
- ass_header = f"""[Script Info]
516
- ; Script generated by Video Styling Space - Auto Caption
517
- ScriptType: v4.00+
518
- PlayResX: 1920
519
- PlayResY: 1080
520
- ScaledBorderAndShadow: yes
521
- WrapStyle: 1
522
-
523
- [V4+ Styles]
524
- Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
525
- Style: Default,{font_family_name},{style.font_size},&H00{font_color_bgr},&H000000FF,&H00{outline_color_bgr},&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,3,{style.outline_width},0,{alignment},{margin_l},{margin_r},{margin_v},1
526
-
527
- [Events]
528
- Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
529
- """
530
-
531
- # Add caption events
532
- ass_events = []
533
- for i, caption in enumerate(captions):
534
- start = format_ass_time(caption.start_time)
535
- end = format_ass_time(caption.end_time)
536
-
537
- text = caption.text
538
- if style.max_width > 0:
539
- text = wrap_text_for_ass(text, style.max_width, font_family_name, style.font_size)
540
-
541
- ass_events.append(f"Dialogue: 0,{start},{end},Default,,0,0,0,,{text}")
542
-
543
- # Write ASS file
544
- ass_file = os.path.join(work_dir, "captions.ass")
545
- with open(ass_file, 'w', encoding='utf-8') as f:
546
- f.write(ass_header + "\n".join(ass_events))
547
-
548
- print("\n=== ASS FILE DEBUG ===")
549
- with open(ass_file, 'r', encoding='utf-8') as f:
550
- print(f.read())
551
- print("=== END ASS DEBUG ===\n")
552
-
553
- print(f"πŸ“ Created caption ASS file with {len(captions)} captions")
554
- print(f"πŸ“ Style line: BorderStyle=3, Outline={style.outline_width}, OutlineColor=&H00{outline_color_bgr}, BackColor=&H{bg_alpha:02X}{bg_color_bgr}")
555
- return ass_file
556
-
557
  # =============================================
558
  # SRT PARSING FUNCTIONS
559
  # =============================================
@@ -615,7 +529,7 @@ def srt_time_to_seconds(time_str: str) -> float:
615
  # =============================================
616
 
617
  def create_text_overlay(input_video, output_video, text_style):
618
- """Add text overlay using ASS subtitles with enhanced colors and outlines"""
619
  font_path = get_font_path(text_style.font_family)
620
  if not font_path:
621
  print(f"⚠️ Font not found: {text_style.font_family}")
@@ -623,116 +537,44 @@ def create_text_overlay(input_video, output_video, text_style):
623
 
624
  print(f"βœ… Using font: {font_path}")
625
 
626
- # Get the actual font family name from the font file
627
- font_family_name = get_font_family_name(font_path)
628
- print(f"πŸ“ Font family name: {font_family_name}")
629
-
630
- # Create working directory for ASS file
631
- work_dir = os.path.dirname(output_video)
632
- ass_file = os.path.join(work_dir, "subtitle.ass")
633
-
634
- # Color mapping
635
- LOCAL_COLOR_MAP = {
636
- "white": "FFFFFF", "black": "000000", "red": "FF0000",
637
- "green": "00FF00", "blue": "0000FF", "yellow": "FFFF00",
638
- "gold": "FFD700", "purple": "800080", "magenta": "FF00FF",
639
- "cyan": "00FFFF", "orange": "FFA500", "pink": "FFC0CB"
640
- }
641
-
642
- # Get font color and convert from RRGGBB to BBGGRR for ASS
643
- font_color_rgb = LOCAL_COLOR_MAP.get(text_style.color.lower(), "FFFFFF")
644
- font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
645
- print(f"🎨 Font color (text): {text_style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
646
-
647
- # Get outline color and convert to BGR
648
- outline_color_rgb = LOCAL_COLOR_MAP.get(text_style.outline_color.lower(), "000000")
649
- outline_color_bgr = outline_color_rgb[4:6] + outline_color_rgb[2:4] + outline_color_rgb[0:2]
650
- print(f"✏️ Outline color (border): {text_style.outline_color} -> RGB={outline_color_rgb} -> BGR={outline_color_bgr}, Width={text_style.outline_width}")
651
-
652
  # Parse background color
653
  bg_parts = text_style.bg_color.split('@')
654
  bg_color_name = bg_parts[0].lower()
655
  bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
656
- bg_opacity = max(0, min(1, bg_opacity))
657
- bg_color_rgb = LOCAL_COLOR_MAP.get(bg_color_name, "000000")
658
- bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
659
- bg_alpha = int((1 - bg_opacity) * 255)
660
- print(f"🎨 BG color (rectangle): {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
661
 
662
- # Map position to ASS alignment
663
- pos_map = {
664
- "bottom-left": 1, "bottom-center": 2, "bottom-right": 3,
665
- "left": 4, "center": 5, "right": 6,
666
- "top-left": 7, "top-center": 8, "top-right": 9
 
 
 
 
 
 
 
667
  }
668
- alignment = pos_map.get(text_style.position, 5)
669
-
670
- # Calculate margins
671
- margin_l = text_style.margin if alignment in [1,4,7] else 0
672
- margin_r = text_style.margin if alignment in [3,6,9] else 0
673
- margin_v = text_style.margin
674
-
675
- # Create fontconfig configuration
676
- font_dir = os.path.dirname(font_path)
677
- fc_config = f"""<?xml version="1.0"?>
678
- <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
679
- <fontconfig>
680
- <dir>{font_dir}</dir>
681
- </fontconfig>"""
682
 
683
- fc_file = os.path.join(work_dir, "fonts.conf")
684
- with open(fc_file, 'w') as f:
685
- f.write(fc_config)
686
- os.environ['FONTCONFIG_FILE'] = fc_file
687
-
688
- # Create ASS file content with solid rectangle background and outline
689
- # BorderStyle=3 gives solid box, Outline adds text border
690
- # Format: &HAABBGGRR where AA is alpha, BB blue, GG green, RR red
691
- ass_content = f"""[Script Info]
692
- ; Script generated by Video Styling Space
693
- ScriptType: v4.00+
694
- PlayResX: 1920
695
- PlayResY: 1080
696
- ScaledBorderAndShadow: yes
697
-
698
- [V4+ Styles]
699
- Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
700
- Style: Default,{font_family_name},{text_style.font_size},&H00{font_color_bgr},&H000000FF,&H00{outline_color_bgr},&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,3,{text_style.outline_width},0,{alignment},{margin_l},{margin_r},{margin_v},1
701
-
702
- [Events]
703
- Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
704
- Dialogue: 0,0:00:00.00,0:00:10.00,Default,,0,0,0,,{text_style.text}"""
705
-
706
- # Write ASS file with UTF-8 encoding
707
- with open(ass_file, 'w', encoding='utf-8') as f:
708
- f.write(ass_content)
709
-
710
- # Debug: Print the ASS file content
711
- print("\n=== ASS FILE DEBUG ===")
712
- with open(ass_file, 'r', encoding='utf-8') as f:
713
- print(f.read())
714
- print("=== END ASS DEBUG ===\n")
715
-
716
- print(f"πŸ“ Created ASS subtitle file with font family: {font_family_name}")
717
- print(f"πŸ“ Style line: BorderStyle=3, Outline={text_style.outline_width}, OutlineColor=&H00{outline_color_bgr}, BackColor=&H{bg_alpha:02X}{bg_color_bgr}")
718
-
719
- # Run FFmpeg with ASS filter
720
- cmd = [
721
  'ffmpeg', '-y',
722
  '-i', input_video,
723
- '-vf', f"ass={ass_file}",
724
  '-c:a', 'copy',
725
  output_video
726
  ]
727
 
728
- print(f"🎬 Running FFmpeg with ASS filter...")
729
- result = subprocess.run(cmd, capture_output=True, text=True)
730
 
731
  if result.returncode == 0:
732
- print(f"βœ… ASS method succeeded with background and outline")
733
  return True
734
  else:
735
- print(f"❌ ASS method failed: {result.stderr}")
736
  return False
737
 
738
  # =============================================
@@ -1141,6 +983,7 @@ async def add_captions_to_video(request: CaptionRequest):
1141
 
1142
  current_video = video_path
1143
 
 
1144
  font_path = get_font_path(request.caption_style.font_family)
1145
  if not font_path:
1146
  return CaptionResponse(
@@ -1149,58 +992,77 @@ async def add_captions_to_video(request: CaptionRequest):
1149
  error=f"Caption font not found: {request.caption_style.font_family}"
1150
  )
1151
 
1152
- font_dir = os.path.dirname(font_path)
1153
- fc_config = f"""<?xml version="1.0"?>
1154
- <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
1155
- <fontconfig>
1156
- <dir>{font_dir}</dir>
1157
- </fontconfig>"""
1158
-
1159
- fc_file = os.path.join(work_dir, "fonts.conf")
1160
- with open(fc_file, 'w') as f:
1161
- f.write(fc_config)
1162
- os.environ['FONTCONFIG_FILE'] = fc_file
1163
-
1164
- ass_file = create_caption_ass(
1165
- request.captions,
1166
- request.caption_style,
1167
- work_dir,
1168
- font_path
1169
- )
1170
-
1171
- captioned_path = os.path.join(work_dir, "captioned.mp4")
1172
-
1173
- cmd = [
1174
- 'ffmpeg', '-y',
1175
- '-i', current_video,
1176
- '-vf', f"ass={ass_file}",
1177
- '-c:a', 'copy',
1178
- captioned_path
1179
- ]
1180
 
1181
- print(f"🎬 Applying captions with FFmpeg...")
1182
- result = subprocess.run(cmd, capture_output=True, text=True)
1183
 
1184
- if result.returncode != 0:
1185
- print(f"❌ FFmpeg error: {result.stderr}")
1186
- return CaptionResponse(
1187
- status="error",
1188
- project_id=request.project_id,
1189
- error=f"Failed to add captions: {result.stderr[:200]}"
 
 
 
 
 
 
 
 
 
 
1190
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1191
 
1192
- current_video = captioned_path
1193
 
1194
  if request.title_overlay:
1195
  titled_path = os.path.join(work_dir, "titled.mp4")
1196
- if create_text_overlay(current_video, titled_path, request.title_overlay):
1197
- current_video = titled_path
1198
  else:
1199
  print("⚠️ Title overlay failed, continuing with captioned video")
1200
 
1201
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1202
  final_filename = f"captioned_{timestamp}.mp4"
1203
- final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
1204
 
1205
  shutil.rmtree(work_dir, ignore_errors=True)
1206
 
@@ -1274,6 +1136,7 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1274
  error="No captions generated from audio"
1275
  )
1276
 
 
1277
  font_path = get_font_path(request.caption_style.font_family)
1278
  if not font_path:
1279
  return TranscriptionResponse(
@@ -1282,56 +1145,75 @@ async def transcribe_and_caption(request: TranscriptionRequest):
1282
  error=f"Caption font not found: {request.caption_style.font_family}"
1283
  )
1284
 
1285
- font_dir = os.path.dirname(font_path)
1286
- fc_config = f"""<?xml version="1.0"?>
1287
- <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
1288
- <fontconfig>
1289
- <dir>{font_dir}</dir>
1290
- </fontconfig>"""
1291
-
1292
- fc_file = os.path.join(work_dir, "fonts.conf")
1293
- with open(fc_file, 'w') as f:
1294
- f.write(fc_config)
1295
- os.environ['FONTCONFIG_FILE'] = fc_file
1296
-
1297
- ass_file = create_caption_ass(
1298
- captions,
1299
- request.caption_style,
1300
- work_dir,
1301
- font_path
1302
- )
1303
-
1304
- captioned_path = os.path.join(work_dir, "captioned.mp4")
1305
-
1306
- cmd = [
1307
- 'ffmpeg', '-y',
1308
- '-i', video_path,
1309
- '-vf', f"ass={ass_file}",
1310
- '-c:a', 'copy',
1311
- captioned_path
1312
- ]
1313
 
1314
- print(f"🎬 Applying captions...")
1315
- result = subprocess.run(cmd, capture_output=True, text=True)
1316
 
1317
- if result.returncode != 0:
1318
- print(f"❌ FFmpeg error: {result.stderr}")
1319
- return TranscriptionResponse(
1320
- status="error",
1321
- project_id=request.project_id,
1322
- error=f"Failed to add captions: {result.stderr[:200]}"
 
 
 
 
 
 
 
 
 
 
1323
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1324
 
1325
- current_video = captioned_path
1326
 
1327
  if request.title_overlay:
1328
  titled_path = os.path.join(work_dir, "titled.mp4")
1329
- if create_text_overlay(current_video, titled_path, request.title_overlay):
1330
- current_video = titled_path
1331
 
1332
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1333
  final_filename = f"transcribed_{timestamp}.mp4"
1334
- final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
1335
 
1336
  transcript = " ".join([c.text for c in captions])
1337
 
@@ -1409,58 +1291,79 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
1409
  error=f"Caption font not found: {request.caption_style.font_family}"
1410
  )
1411
 
1412
- font_dir = os.path.dirname(font_path)
1413
- fc_config = f"""<?xml version="1.0"?>
1414
- <!DOCTYPE fontconfig SYSTEM "fonts.dtd">
1415
- <fontconfig>
1416
- <dir>{font_dir}</dir>
1417
- </fontconfig>"""
1418
-
1419
- fc_file = os.path.join(work_dir, "fonts.conf")
1420
- with open(fc_file, 'w') as f:
1421
- f.write(fc_config)
1422
- os.environ['FONTCONFIG_FILE'] = fc_file
1423
-
1424
- ass_file = create_caption_ass(
1425
- captions,
1426
- request.caption_style,
1427
- work_dir,
1428
- font_path
1429
- )
1430
-
1431
- captioned_path = os.path.join(work_dir, "captioned.mp4")
1432
-
1433
- cmd = [
1434
- 'ffmpeg', '-y',
1435
- '-i', video_path,
1436
- '-vf', f"ass={ass_file}",
1437
- '-c:a', 'copy',
1438
- captioned_path
1439
- ]
1440
 
1441
- print(f"🎬 Applying captions from SRT...")
1442
- result = subprocess.run(cmd, capture_output=True, text=True)
1443
 
1444
- if result.returncode != 0:
1445
- print(f"❌ FFmpeg error: {result.stderr}")
1446
- return CaptionResponse(
1447
- status="error",
1448
- project_id=request.project_id,
1449
- error=f"Failed to add captions: {result.stderr[:200]}"
 
 
 
 
 
 
 
 
 
 
1450
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1451
 
1452
- current_video = captioned_path
1453
 
1454
  if request.title_overlay:
1455
  titled_path = os.path.join(work_dir, "titled.mp4")
1456
- if create_text_overlay(current_video, titled_path, request.title_overlay):
1457
- current_video = titled_path
1458
  else:
1459
  print("⚠️ Title overlay failed, continuing with captioned video")
1460
 
1461
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1462
  final_filename = f"captioned_from_srt_{timestamp}.mp4"
1463
- final_url = upload_to_dataset(current_video, request.project_id, final_filename, "videos")
1464
 
1465
  shutil.rmtree(work_dir, ignore_errors=True)
1466
 
@@ -1492,7 +1395,7 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
1492
  async def root():
1493
  return {
1494
  "name": "Text Styling API with Auto Caption",
1495
- "version": "3.6.0",
1496
  "features": {
1497
  "title_overlay": "βœ…",
1498
  "manual_captions": "βœ…",
@@ -1500,7 +1403,8 @@ async def root():
1500
  "srt_support": "βœ…",
1501
  "enhanced_colors": f"βœ… ({len(COLOR_MAP)} colors)",
1502
  "text_outline": "βœ…",
1503
- "background_boxes": "βœ…"
 
1504
  },
1505
  "endpoints": {
1506
  "style": "POST /api/style - Add title overlay",
 
468
 
469
  return "\\N".join(lines)
470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  # =============================================
472
  # SRT PARSING FUNCTIONS
473
  # =============================================
 
529
  # =============================================
530
 
531
  def create_text_overlay(input_video, output_video, text_style):
532
+ """Add text overlay using drawtext method with background box and outline"""
533
  font_path = get_font_path(text_style.font_family)
534
  if not font_path:
535
  print(f"⚠️ Font not found: {text_style.font_family}")
 
537
 
538
  print(f"βœ… Using font: {font_path}")
539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  # Parse background color
541
  bg_parts = text_style.bg_color.split('@')
542
  bg_color_name = bg_parts[0].lower()
543
  bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
 
 
 
 
 
544
 
545
+ # Map position to drawtext position with margin
546
+ margin = text_style.margin
547
+ drawtext_pos = {
548
+ "bottom-left": f"x={margin}:y=h-th-{margin}",
549
+ "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
550
+ "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
551
+ "center": "x=(w-tw)/2:y=(h-th)/2",
552
+ "left": f"x={margin}:y=(h-th)/2",
553
+ "right": f"x=w-tw-{margin}:y=(h-th)/2",
554
+ "top-left": f"x={margin}:y={margin}",
555
+ "top-center": f"x=(w-tw)/2:y={margin}",
556
+ "top-right": f"x=w-tw-{margin}:y={margin}"
557
  }
558
+ position = drawtext_pos.get(text_style.position, "x=(w-tw)/2:y=(h-th)/2")
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
+ # Build drawtext command with background box and outline
561
+ # borderw = outline width, bordercolor = outline color
562
+ drawtext_cmd = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  'ffmpeg', '-y',
564
  '-i', input_video,
565
+ '-vf', f"drawtext=text='{text_style.text}':fontfile={font_path}:fontsize={text_style.font_size}:fontcolor={text_style.color}:{position}:box=1:boxcolor={bg_color_name}@{bg_opacity}:boxborderw={text_style.padding}:borderw={text_style.outline_width}:bordercolor={text_style.outline_color}",
566
  '-c:a', 'copy',
567
  output_video
568
  ]
569
 
570
+ print(f"🎬 Running drawtext command with outline={text_style.outline_width}, bordercolor={text_style.outline_color}")
571
+ result = subprocess.run(drawtext_cmd, capture_output=True, text=True)
572
 
573
  if result.returncode == 0:
574
+ print(f"βœ… Drawtext method succeeded with background and outline")
575
  return True
576
  else:
577
+ print(f"❌ Drawtext failed: {result.stderr}")
578
  return False
579
 
580
  # =============================================
 
983
 
984
  current_video = video_path
985
 
986
+ # Get font path
987
  font_path = get_font_path(request.caption_style.font_family)
988
  if not font_path:
989
  return CaptionResponse(
 
992
  error=f"Caption font not found: {request.caption_style.font_family}"
993
  )
994
 
995
+ # Get margin value
996
+ margin = request.caption_style.margin
997
+
998
+ # Map position to drawtext position with margin
999
+ drawtext_pos = {
1000
+ "bottom-left": f"x={margin}:y=h-th-{margin}",
1001
+ "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
1002
+ "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
1003
+ "center": "x=(w-tw)/2:y=(h-th)/2",
1004
+ "left": f"x={margin}:y=(h-th)/2",
1005
+ "right": f"x=w-tw-{margin}:y=(h-th)/2",
1006
+ "top-left": f"x={margin}:y={margin}",
1007
+ "top-center": f"x=(w-tw)/2:y={margin}",
1008
+ "top-right": f"x=w-tw-{margin}:y={margin}"
1009
+ }
1010
+ position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}")
 
 
 
 
 
 
 
 
 
 
 
 
1011
 
1012
+ # Process each caption with its own drawtext filter
1013
+ current_input = video_path
1014
 
1015
+ for i, caption in enumerate(request.captions):
1016
+ temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4")
1017
+
1018
+ # Build drawtext filter with background box and outline
1019
+ drawtext_filter = (
1020
+ f"drawtext=text='{caption.text}':"
1021
+ f"fontfile={font_path}:"
1022
+ f"fontsize={request.caption_style.font_size}:"
1023
+ f"fontcolor={request.caption_style.color}:"
1024
+ f"{position}:"
1025
+ f"box=1:"
1026
+ f"boxcolor={request.caption_style.bg_color}:"
1027
+ f"boxborderw={request.caption_style.padding}:"
1028
+ f"borderw={request.caption_style.outline_width}:"
1029
+ f"bordercolor={request.caption_style.outline_color}:"
1030
+ f"enable='between(t,{caption.start_time},{caption.end_time})'"
1031
  )
1032
+
1033
+ cmd = [
1034
+ 'ffmpeg', '-y',
1035
+ '-i', current_input,
1036
+ '-vf', drawtext_filter,
1037
+ '-c:a', 'copy',
1038
+ temp_output
1039
+ ]
1040
+
1041
+ print(f"🎬 Adding caption {i+1}/{len(request.captions)}: '{caption.text}' with outline={request.caption_style.outline_width}")
1042
+ result = subprocess.run(cmd, capture_output=True, text=True)
1043
+
1044
+ if result.returncode != 0:
1045
+ print(f"❌ FFmpeg error at caption {i+1}: {result.stderr}")
1046
+ return CaptionResponse(
1047
+ status="error",
1048
+ project_id=request.project_id,
1049
+ error=f"Failed to add caption {i+1}: {result.stderr[:200]}"
1050
+ )
1051
+
1052
+ current_input = temp_output
1053
 
1054
+ final_video = current_input
1055
 
1056
  if request.title_overlay:
1057
  titled_path = os.path.join(work_dir, "titled.mp4")
1058
+ if create_text_overlay(final_video, titled_path, request.title_overlay):
1059
+ final_video = titled_path
1060
  else:
1061
  print("⚠️ Title overlay failed, continuing with captioned video")
1062
 
1063
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1064
  final_filename = f"captioned_{timestamp}.mp4"
1065
+ final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos")
1066
 
1067
  shutil.rmtree(work_dir, ignore_errors=True)
1068
 
 
1136
  error="No captions generated from audio"
1137
  )
1138
 
1139
+ # Get font path
1140
  font_path = get_font_path(request.caption_style.font_family)
1141
  if not font_path:
1142
  return TranscriptionResponse(
 
1145
  error=f"Caption font not found: {request.caption_style.font_family}"
1146
  )
1147
 
1148
+ # Get margin value
1149
+ margin = request.caption_style.margin
1150
+
1151
+ # Map position to drawtext position with margin
1152
+ drawtext_pos = {
1153
+ "bottom-left": f"x={margin}:y=h-th-{margin}",
1154
+ "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
1155
+ "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
1156
+ "center": "x=(w-tw)/2:y=(h-th)/2",
1157
+ "left": f"x={margin}:y=(h-th)/2",
1158
+ "right": f"x=w-tw-{margin}:y=(h-th)/2",
1159
+ "top-left": f"x={margin}:y={margin}",
1160
+ "top-center": f"x=(w-tw)/2:y={margin}",
1161
+ "top-right": f"x=w-tw-{margin}:y={margin}"
1162
+ }
1163
+ position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}")
 
 
 
 
 
 
 
 
 
 
 
 
1164
 
1165
+ # Process each caption with its own drawtext filter
1166
+ current_input = video_path
1167
 
1168
+ for i, caption in enumerate(captions):
1169
+ temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4")
1170
+
1171
+ # Build drawtext filter with background box and outline
1172
+ drawtext_filter = (
1173
+ f"drawtext=text='{caption.text}':"
1174
+ f"fontfile={font_path}:"
1175
+ f"fontsize={request.caption_style.font_size}:"
1176
+ f"fontcolor={request.caption_style.color}:"
1177
+ f"{position}:"
1178
+ f"box=1:"
1179
+ f"boxcolor={request.caption_style.bg_color}:"
1180
+ f"boxborderw={request.caption_style.padding}:"
1181
+ f"borderw={request.caption_style.outline_width}:"
1182
+ f"bordercolor={request.caption_style.outline_color}:"
1183
+ f"enable='between(t,{caption.start_time},{caption.end_time})'"
1184
  )
1185
+
1186
+ cmd = [
1187
+ 'ffmpeg', '-y',
1188
+ '-i', current_input,
1189
+ '-vf', drawtext_filter,
1190
+ '-c:a', 'copy',
1191
+ temp_output
1192
+ ]
1193
+
1194
+ print(f"🎬 Adding caption {i+1}/{len(captions)}: '{caption.text}'")
1195
+ result = subprocess.run(cmd, capture_output=True, text=True)
1196
+
1197
+ if result.returncode != 0:
1198
+ print(f"❌ FFmpeg error at caption {i+1}: {result.stderr}")
1199
+ return TranscriptionResponse(
1200
+ status="error",
1201
+ project_id=request.project_id,
1202
+ error=f"Failed to add caption {i+1}: {result.stderr[:200]}"
1203
+ )
1204
+
1205
+ current_input = temp_output
1206
 
1207
+ final_video = current_input
1208
 
1209
  if request.title_overlay:
1210
  titled_path = os.path.join(work_dir, "titled.mp4")
1211
+ if create_text_overlay(final_video, titled_path, request.title_overlay):
1212
+ final_video = titled_path
1213
 
1214
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1215
  final_filename = f"transcribed_{timestamp}.mp4"
1216
+ final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos")
1217
 
1218
  transcript = " ".join([c.text for c in captions])
1219
 
 
1291
  error=f"Caption font not found: {request.caption_style.font_family}"
1292
  )
1293
 
1294
+ # Get margin value
1295
+ margin = request.caption_style.margin
1296
+ print(f"πŸ“ Using margin: {margin} pixels")
1297
+
1298
+ # Map position to drawtext position with margin
1299
+ drawtext_pos = {
1300
+ "bottom-left": f"x={margin}:y=h-th-{margin}",
1301
+ "bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
1302
+ "bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
1303
+ "center": "x=(w-tw)/2:y=(h-th)/2",
1304
+ "left": f"x={margin}:y=(h-th)/2",
1305
+ "right": f"x=w-tw-{margin}:y=(h-th)/2",
1306
+ "top-left": f"x={margin}:y={margin}",
1307
+ "top-center": f"x=(w-tw)/2:y={margin}",
1308
+ "top-right": f"x=w-tw-{margin}:y={margin}"
1309
+ }
1310
+ position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}")
1311
+ print(f"πŸ“ Position string: {position}")
 
 
 
 
 
 
 
 
 
 
1312
 
1313
+ # Process each caption with its own drawtext filter
1314
+ current_input = video_path
1315
 
1316
+ for i, caption in enumerate(captions):
1317
+ temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4")
1318
+
1319
+ # Build drawtext filter with background box and outline
1320
+ drawtext_filter = (
1321
+ f"drawtext=text='{caption.text}':"
1322
+ f"fontfile={font_path}:"
1323
+ f"fontsize={request.caption_style.font_size}:"
1324
+ f"fontcolor={request.caption_style.color}:"
1325
+ f"{position}:"
1326
+ f"box=1:"
1327
+ f"boxcolor={request.caption_style.bg_color}:"
1328
+ f"boxborderw={request.caption_style.padding}:"
1329
+ f"borderw={request.caption_style.outline_width}:"
1330
+ f"bordercolor={request.caption_style.outline_color}:"
1331
+ f"enable='between(t,{caption.start_time},{caption.end_time})'"
1332
  )
1333
+
1334
+ cmd = [
1335
+ 'ffmpeg', '-y',
1336
+ '-i', current_input,
1337
+ '-vf', drawtext_filter,
1338
+ '-c:a', 'copy',
1339
+ temp_output
1340
+ ]
1341
+
1342
+ print(f"🎬 Adding caption {i+1}/{len(captions)}: '{caption.text}' at margin {margin} with outline={request.caption_style.outline_width}")
1343
+ result = subprocess.run(cmd, capture_output=True, text=True)
1344
+
1345
+ if result.returncode != 0:
1346
+ print(f"❌ FFmpeg error at caption {i+1}: {result.stderr}")
1347
+ return CaptionResponse(
1348
+ status="error",
1349
+ project_id=request.project_id,
1350
+ error=f"Failed to add caption {i+1}: {result.stderr[:200]}"
1351
+ )
1352
+
1353
+ current_input = temp_output
1354
 
1355
+ final_video = current_input
1356
 
1357
  if request.title_overlay:
1358
  titled_path = os.path.join(work_dir, "titled.mp4")
1359
+ if create_text_overlay(final_video, titled_path, request.title_overlay):
1360
+ final_video = titled_path
1361
  else:
1362
  print("⚠️ Title overlay failed, continuing with captioned video")
1363
 
1364
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1365
  final_filename = f"captioned_from_srt_{timestamp}.mp4"
1366
+ final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos")
1367
 
1368
  shutil.rmtree(work_dir, ignore_errors=True)
1369
 
 
1395
  async def root():
1396
  return {
1397
  "name": "Text Styling API with Auto Caption",
1398
+ "version": "4.0.0",
1399
  "features": {
1400
  "title_overlay": "βœ…",
1401
  "manual_captions": "βœ…",
 
1403
  "srt_support": "βœ…",
1404
  "enhanced_colors": f"βœ… ({len(COLOR_MAP)} colors)",
1405
  "text_outline": "βœ…",
1406
+ "background_boxes": "βœ…",
1407
+ "margin_control": "βœ…"
1408
  },
1409
  "endpoints": {
1410
  "style": "POST /api/style - Add title overlay",