Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -468,92 +468,6 @@ def wrap_text_for_ass(text: str, max_width: int, font_family: str, font_size: in
|
|
| 468 |
|
| 469 |
return "\\N".join(lines)
|
| 470 |
|
| 471 |
-
|
| 472 |
-
def create_caption_ass(captions: List[CaptionSegment], style: CaptionStyle, work_dir: str, font_path: str) -> str:
|
| 473 |
-
"""Create ASS subtitle file for captions with enhanced colors and outlines"""
|
| 474 |
-
|
| 475 |
-
# Get actual font family name
|
| 476 |
-
font_family_name = get_font_family_name(font_path)
|
| 477 |
-
print(f"π Caption font family: {font_family_name}")
|
| 478 |
-
|
| 479 |
-
# Get font color and convert from RRGGBB to BBGGRR for ASS
|
| 480 |
-
font_color_rgb = COLOR_MAP.get(style.color.lower(), "FFFFFF")
|
| 481 |
-
font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
|
| 482 |
-
print(f"π¨ Font color (text): {style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
|
| 483 |
-
|
| 484 |
-
# Get outline color and convert to BGR
|
| 485 |
-
outline_color_rgb = COLOR_MAP.get(style.outline_color.lower(), "000000")
|
| 486 |
-
outline_color_bgr = outline_color_rgb[4:6] + outline_color_rgb[2:4] + outline_color_rgb[0:2]
|
| 487 |
-
print(f"βοΈ Outline color (border): {style.outline_color} -> RGB={outline_color_rgb} -> BGR={outline_color_bgr}, Width={style.outline_width}")
|
| 488 |
-
|
| 489 |
-
# Parse background color with proper alpha calculation
|
| 490 |
-
bg_parts = style.bg_color.split('@')
|
| 491 |
-
bg_color_name = bg_parts[0].lower()
|
| 492 |
-
bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
|
| 493 |
-
bg_opacity = max(0, min(1, bg_opacity))
|
| 494 |
-
bg_color_rgb = COLOR_MAP.get(bg_color_name, "000000")
|
| 495 |
-
bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
|
| 496 |
-
bg_alpha = int((1 - bg_opacity) * 255)
|
| 497 |
-
print(f"π¨ BG color (rectangle): {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
|
| 498 |
-
|
| 499 |
-
# Map position to ASS alignment
|
| 500 |
-
pos_map = {
|
| 501 |
-
"bottom-left": 1, "bottom-center": 2, "bottom-right": 3,
|
| 502 |
-
"left": 4, "center": 5, "right": 6,
|
| 503 |
-
"top-left": 7, "top-center": 8, "top-right": 9
|
| 504 |
-
}
|
| 505 |
-
alignment = pos_map.get(style.position, 2)
|
| 506 |
-
|
| 507 |
-
# Calculate margins
|
| 508 |
-
margin_l = style.margin if alignment in [1,4,7] else 0
|
| 509 |
-
margin_r = style.margin if alignment in [3,6,9] else 0
|
| 510 |
-
margin_v = style.margin
|
| 511 |
-
|
| 512 |
-
# Create ASS header with proper border settings
|
| 513 |
-
# BorderStyle=3 gives solid box, Outline adds text border
|
| 514 |
-
# Format: &HAABBGGRR where AA is alpha, BB blue, GG green, RR red
|
| 515 |
-
ass_header = f"""[Script Info]
|
| 516 |
-
; Script generated by Video Styling Space - Auto Caption
|
| 517 |
-
ScriptType: v4.00+
|
| 518 |
-
PlayResX: 1920
|
| 519 |
-
PlayResY: 1080
|
| 520 |
-
ScaledBorderAndShadow: yes
|
| 521 |
-
WrapStyle: 1
|
| 522 |
-
|
| 523 |
-
[V4+ Styles]
|
| 524 |
-
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 525 |
-
Style: Default,{font_family_name},{style.font_size},&H00{font_color_bgr},&H000000FF,&H00{outline_color_bgr},&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,3,{style.outline_width},0,{alignment},{margin_l},{margin_r},{margin_v},1
|
| 526 |
-
|
| 527 |
-
[Events]
|
| 528 |
-
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
| 529 |
-
"""
|
| 530 |
-
|
| 531 |
-
# Add caption events
|
| 532 |
-
ass_events = []
|
| 533 |
-
for i, caption in enumerate(captions):
|
| 534 |
-
start = format_ass_time(caption.start_time)
|
| 535 |
-
end = format_ass_time(caption.end_time)
|
| 536 |
-
|
| 537 |
-
text = caption.text
|
| 538 |
-
if style.max_width > 0:
|
| 539 |
-
text = wrap_text_for_ass(text, style.max_width, font_family_name, style.font_size)
|
| 540 |
-
|
| 541 |
-
ass_events.append(f"Dialogue: 0,{start},{end},Default,,0,0,0,,{text}")
|
| 542 |
-
|
| 543 |
-
# Write ASS file
|
| 544 |
-
ass_file = os.path.join(work_dir, "captions.ass")
|
| 545 |
-
with open(ass_file, 'w', encoding='utf-8') as f:
|
| 546 |
-
f.write(ass_header + "\n".join(ass_events))
|
| 547 |
-
|
| 548 |
-
print("\n=== ASS FILE DEBUG ===")
|
| 549 |
-
with open(ass_file, 'r', encoding='utf-8') as f:
|
| 550 |
-
print(f.read())
|
| 551 |
-
print("=== END ASS DEBUG ===\n")
|
| 552 |
-
|
| 553 |
-
print(f"π Created caption ASS file with {len(captions)} captions")
|
| 554 |
-
print(f"π Style line: BorderStyle=3, Outline={style.outline_width}, OutlineColor=&H00{outline_color_bgr}, BackColor=&H{bg_alpha:02X}{bg_color_bgr}")
|
| 555 |
-
return ass_file
|
| 556 |
-
|
| 557 |
# =============================================
|
| 558 |
# SRT PARSING FUNCTIONS
|
| 559 |
# =============================================
|
|
@@ -615,7 +529,7 @@ def srt_time_to_seconds(time_str: str) -> float:
|
|
| 615 |
# =============================================
|
| 616 |
|
| 617 |
def create_text_overlay(input_video, output_video, text_style):
|
| 618 |
-
"""Add text overlay using
|
| 619 |
font_path = get_font_path(text_style.font_family)
|
| 620 |
if not font_path:
|
| 621 |
print(f"β οΈ Font not found: {text_style.font_family}")
|
|
@@ -623,116 +537,44 @@ def create_text_overlay(input_video, output_video, text_style):
|
|
| 623 |
|
| 624 |
print(f"β
Using font: {font_path}")
|
| 625 |
|
| 626 |
-
# Get the actual font family name from the font file
|
| 627 |
-
font_family_name = get_font_family_name(font_path)
|
| 628 |
-
print(f"π Font family name: {font_family_name}")
|
| 629 |
-
|
| 630 |
-
# Create working directory for ASS file
|
| 631 |
-
work_dir = os.path.dirname(output_video)
|
| 632 |
-
ass_file = os.path.join(work_dir, "subtitle.ass")
|
| 633 |
-
|
| 634 |
-
# Color mapping
|
| 635 |
-
LOCAL_COLOR_MAP = {
|
| 636 |
-
"white": "FFFFFF", "black": "000000", "red": "FF0000",
|
| 637 |
-
"green": "00FF00", "blue": "0000FF", "yellow": "FFFF00",
|
| 638 |
-
"gold": "FFD700", "purple": "800080", "magenta": "FF00FF",
|
| 639 |
-
"cyan": "00FFFF", "orange": "FFA500", "pink": "FFC0CB"
|
| 640 |
-
}
|
| 641 |
-
|
| 642 |
-
# Get font color and convert from RRGGBB to BBGGRR for ASS
|
| 643 |
-
font_color_rgb = LOCAL_COLOR_MAP.get(text_style.color.lower(), "FFFFFF")
|
| 644 |
-
font_color_bgr = font_color_rgb[4:6] + font_color_rgb[2:4] + font_color_rgb[0:2]
|
| 645 |
-
print(f"π¨ Font color (text): {text_style.color} -> RGB={font_color_rgb} -> BGR={font_color_bgr}")
|
| 646 |
-
|
| 647 |
-
# Get outline color and convert to BGR
|
| 648 |
-
outline_color_rgb = LOCAL_COLOR_MAP.get(text_style.outline_color.lower(), "000000")
|
| 649 |
-
outline_color_bgr = outline_color_rgb[4:6] + outline_color_rgb[2:4] + outline_color_rgb[0:2]
|
| 650 |
-
print(f"βοΈ Outline color (border): {text_style.outline_color} -> RGB={outline_color_rgb} -> BGR={outline_color_bgr}, Width={text_style.outline_width}")
|
| 651 |
-
|
| 652 |
# Parse background color
|
| 653 |
bg_parts = text_style.bg_color.split('@')
|
| 654 |
bg_color_name = bg_parts[0].lower()
|
| 655 |
bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
|
| 656 |
-
bg_opacity = max(0, min(1, bg_opacity))
|
| 657 |
-
bg_color_rgb = LOCAL_COLOR_MAP.get(bg_color_name, "000000")
|
| 658 |
-
bg_color_bgr = bg_color_rgb[4:6] + bg_color_rgb[2:4] + bg_color_rgb[0:2]
|
| 659 |
-
bg_alpha = int((1 - bg_opacity) * 255)
|
| 660 |
-
print(f"π¨ BG color (rectangle): {bg_color_name} -> RGB={bg_color_rgb} -> BGR={bg_color_bgr}, Opacity={bg_opacity}, Alpha={bg_alpha}")
|
| 661 |
|
| 662 |
-
# Map position to
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
"left":
|
| 666 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
}
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
# Calculate margins
|
| 671 |
-
margin_l = text_style.margin if alignment in [1,4,7] else 0
|
| 672 |
-
margin_r = text_style.margin if alignment in [3,6,9] else 0
|
| 673 |
-
margin_v = text_style.margin
|
| 674 |
-
|
| 675 |
-
# Create fontconfig configuration
|
| 676 |
-
font_dir = os.path.dirname(font_path)
|
| 677 |
-
fc_config = f"""<?xml version="1.0"?>
|
| 678 |
-
<!DOCTYPE fontconfig SYSTEM "fonts.dtd">
|
| 679 |
-
<fontconfig>
|
| 680 |
-
<dir>{font_dir}</dir>
|
| 681 |
-
</fontconfig>"""
|
| 682 |
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
os.environ['FONTCONFIG_FILE'] = fc_file
|
| 687 |
-
|
| 688 |
-
# Create ASS file content with solid rectangle background and outline
|
| 689 |
-
# BorderStyle=3 gives solid box, Outline adds text border
|
| 690 |
-
# Format: &HAABBGGRR where AA is alpha, BB blue, GG green, RR red
|
| 691 |
-
ass_content = f"""[Script Info]
|
| 692 |
-
; Script generated by Video Styling Space
|
| 693 |
-
ScriptType: v4.00+
|
| 694 |
-
PlayResX: 1920
|
| 695 |
-
PlayResY: 1080
|
| 696 |
-
ScaledBorderAndShadow: yes
|
| 697 |
-
|
| 698 |
-
[V4+ Styles]
|
| 699 |
-
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 700 |
-
Style: Default,{font_family_name},{text_style.font_size},&H00{font_color_bgr},&H000000FF,&H00{outline_color_bgr},&H{bg_alpha:02X}{bg_color_bgr},0,0,0,0,100,100,0,0,3,{text_style.outline_width},0,{alignment},{margin_l},{margin_r},{margin_v},1
|
| 701 |
-
|
| 702 |
-
[Events]
|
| 703 |
-
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
| 704 |
-
Dialogue: 0,0:00:00.00,0:00:10.00,Default,,0,0,0,,{text_style.text}"""
|
| 705 |
-
|
| 706 |
-
# Write ASS file with UTF-8 encoding
|
| 707 |
-
with open(ass_file, 'w', encoding='utf-8') as f:
|
| 708 |
-
f.write(ass_content)
|
| 709 |
-
|
| 710 |
-
# Debug: Print the ASS file content
|
| 711 |
-
print("\n=== ASS FILE DEBUG ===")
|
| 712 |
-
with open(ass_file, 'r', encoding='utf-8') as f:
|
| 713 |
-
print(f.read())
|
| 714 |
-
print("=== END ASS DEBUG ===\n")
|
| 715 |
-
|
| 716 |
-
print(f"π Created ASS subtitle file with font family: {font_family_name}")
|
| 717 |
-
print(f"π Style line: BorderStyle=3, Outline={text_style.outline_width}, OutlineColor=&H00{outline_color_bgr}, BackColor=&H{bg_alpha:02X}{bg_color_bgr}")
|
| 718 |
-
|
| 719 |
-
# Run FFmpeg with ASS filter
|
| 720 |
-
cmd = [
|
| 721 |
'ffmpeg', '-y',
|
| 722 |
'-i', input_video,
|
| 723 |
-
'-vf', f"
|
| 724 |
'-c:a', 'copy',
|
| 725 |
output_video
|
| 726 |
]
|
| 727 |
|
| 728 |
-
print(f"π¬ Running
|
| 729 |
-
result = subprocess.run(
|
| 730 |
|
| 731 |
if result.returncode == 0:
|
| 732 |
-
print(f"β
|
| 733 |
return True
|
| 734 |
else:
|
| 735 |
-
print(f"β
|
| 736 |
return False
|
| 737 |
|
| 738 |
# =============================================
|
|
@@ -1141,6 +983,7 @@ async def add_captions_to_video(request: CaptionRequest):
|
|
| 1141 |
|
| 1142 |
current_video = video_path
|
| 1143 |
|
|
|
|
| 1144 |
font_path = get_font_path(request.caption_style.font_family)
|
| 1145 |
if not font_path:
|
| 1146 |
return CaptionResponse(
|
|
@@ -1149,58 +992,77 @@ async def add_captions_to_video(request: CaptionRequest):
|
|
| 1149 |
error=f"Caption font not found: {request.caption_style.font_family}"
|
| 1150 |
)
|
| 1151 |
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
f
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
font_path
|
| 1169 |
-
)
|
| 1170 |
-
|
| 1171 |
-
captioned_path = os.path.join(work_dir, "captioned.mp4")
|
| 1172 |
-
|
| 1173 |
-
cmd = [
|
| 1174 |
-
'ffmpeg', '-y',
|
| 1175 |
-
'-i', current_video,
|
| 1176 |
-
'-vf', f"ass={ass_file}",
|
| 1177 |
-
'-c:a', 'copy',
|
| 1178 |
-
captioned_path
|
| 1179 |
-
]
|
| 1180 |
|
| 1181 |
-
|
| 1182 |
-
|
| 1183 |
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1190 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1191 |
|
| 1192 |
-
|
| 1193 |
|
| 1194 |
if request.title_overlay:
|
| 1195 |
titled_path = os.path.join(work_dir, "titled.mp4")
|
| 1196 |
-
if create_text_overlay(
|
| 1197 |
-
|
| 1198 |
else:
|
| 1199 |
print("β οΈ Title overlay failed, continuing with captioned video")
|
| 1200 |
|
| 1201 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1202 |
final_filename = f"captioned_{timestamp}.mp4"
|
| 1203 |
-
final_url = upload_to_dataset(
|
| 1204 |
|
| 1205 |
shutil.rmtree(work_dir, ignore_errors=True)
|
| 1206 |
|
|
@@ -1274,6 +1136,7 @@ async def transcribe_and_caption(request: TranscriptionRequest):
|
|
| 1274 |
error="No captions generated from audio"
|
| 1275 |
)
|
| 1276 |
|
|
|
|
| 1277 |
font_path = get_font_path(request.caption_style.font_family)
|
| 1278 |
if not font_path:
|
| 1279 |
return TranscriptionResponse(
|
|
@@ -1282,56 +1145,75 @@ async def transcribe_and_caption(request: TranscriptionRequest):
|
|
| 1282 |
error=f"Caption font not found: {request.caption_style.font_family}"
|
| 1283 |
)
|
| 1284 |
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
-
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
f
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
-
|
| 1301 |
-
font_path
|
| 1302 |
-
)
|
| 1303 |
-
|
| 1304 |
-
captioned_path = os.path.join(work_dir, "captioned.mp4")
|
| 1305 |
-
|
| 1306 |
-
cmd = [
|
| 1307 |
-
'ffmpeg', '-y',
|
| 1308 |
-
'-i', video_path,
|
| 1309 |
-
'-vf', f"ass={ass_file}",
|
| 1310 |
-
'-c:a', 'copy',
|
| 1311 |
-
captioned_path
|
| 1312 |
-
]
|
| 1313 |
|
| 1314 |
-
|
| 1315 |
-
|
| 1316 |
|
| 1317 |
-
|
| 1318 |
-
|
| 1319 |
-
|
| 1320 |
-
|
| 1321 |
-
|
| 1322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1323 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1324 |
|
| 1325 |
-
|
| 1326 |
|
| 1327 |
if request.title_overlay:
|
| 1328 |
titled_path = os.path.join(work_dir, "titled.mp4")
|
| 1329 |
-
if create_text_overlay(
|
| 1330 |
-
|
| 1331 |
|
| 1332 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1333 |
final_filename = f"transcribed_{timestamp}.mp4"
|
| 1334 |
-
final_url = upload_to_dataset(
|
| 1335 |
|
| 1336 |
transcript = " ".join([c.text for c in captions])
|
| 1337 |
|
|
@@ -1409,58 +1291,79 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
|
|
| 1409 |
error=f"Caption font not found: {request.caption_style.font_family}"
|
| 1410 |
)
|
| 1411 |
|
| 1412 |
-
|
| 1413 |
-
|
| 1414 |
-
|
| 1415 |
-
|
| 1416 |
-
|
| 1417 |
-
|
| 1418 |
-
|
| 1419 |
-
|
| 1420 |
-
|
| 1421 |
-
|
| 1422 |
-
|
| 1423 |
-
|
| 1424 |
-
|
| 1425 |
-
|
| 1426 |
-
|
| 1427 |
-
|
| 1428 |
-
|
| 1429 |
-
)
|
| 1430 |
-
|
| 1431 |
-
captioned_path = os.path.join(work_dir, "captioned.mp4")
|
| 1432 |
-
|
| 1433 |
-
cmd = [
|
| 1434 |
-
'ffmpeg', '-y',
|
| 1435 |
-
'-i', video_path,
|
| 1436 |
-
'-vf', f"ass={ass_file}",
|
| 1437 |
-
'-c:a', 'copy',
|
| 1438 |
-
captioned_path
|
| 1439 |
-
]
|
| 1440 |
|
| 1441 |
-
|
| 1442 |
-
|
| 1443 |
|
| 1444 |
-
|
| 1445 |
-
|
| 1446 |
-
|
| 1447 |
-
|
| 1448 |
-
|
| 1449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1450 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1451 |
|
| 1452 |
-
|
| 1453 |
|
| 1454 |
if request.title_overlay:
|
| 1455 |
titled_path = os.path.join(work_dir, "titled.mp4")
|
| 1456 |
-
if create_text_overlay(
|
| 1457 |
-
|
| 1458 |
else:
|
| 1459 |
print("β οΈ Title overlay failed, continuing with captioned video")
|
| 1460 |
|
| 1461 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1462 |
final_filename = f"captioned_from_srt_{timestamp}.mp4"
|
| 1463 |
-
final_url = upload_to_dataset(
|
| 1464 |
|
| 1465 |
shutil.rmtree(work_dir, ignore_errors=True)
|
| 1466 |
|
|
@@ -1492,7 +1395,7 @@ async def add_captions_from_srt(request: SrtCaptionRequest):
|
|
| 1492 |
async def root():
|
| 1493 |
return {
|
| 1494 |
"name": "Text Styling API with Auto Caption",
|
| 1495 |
-
"version": "
|
| 1496 |
"features": {
|
| 1497 |
"title_overlay": "β
",
|
| 1498 |
"manual_captions": "β
",
|
|
@@ -1500,7 +1403,8 @@ async def root():
|
|
| 1500 |
"srt_support": "β
",
|
| 1501 |
"enhanced_colors": f"β
({len(COLOR_MAP)} colors)",
|
| 1502 |
"text_outline": "β
",
|
| 1503 |
-
"background_boxes": "β
"
|
|
|
|
| 1504 |
},
|
| 1505 |
"endpoints": {
|
| 1506 |
"style": "POST /api/style - Add title overlay",
|
|
|
|
| 468 |
|
| 469 |
return "\\N".join(lines)
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
# =============================================
|
| 472 |
# SRT PARSING FUNCTIONS
|
| 473 |
# =============================================
|
|
|
|
| 529 |
# =============================================
|
| 530 |
|
| 531 |
def create_text_overlay(input_video, output_video, text_style):
|
| 532 |
+
"""Add text overlay using drawtext method with background box and outline"""
|
| 533 |
font_path = get_font_path(text_style.font_family)
|
| 534 |
if not font_path:
|
| 535 |
print(f"β οΈ Font not found: {text_style.font_family}")
|
|
|
|
| 537 |
|
| 538 |
print(f"β
Using font: {font_path}")
|
| 539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
# Parse background color
|
| 541 |
bg_parts = text_style.bg_color.split('@')
|
| 542 |
bg_color_name = bg_parts[0].lower()
|
| 543 |
bg_opacity = float(bg_parts[1]) if len(bg_parts) > 1 else 0.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
|
| 545 |
+
# Map position to drawtext position with margin
|
| 546 |
+
margin = text_style.margin
|
| 547 |
+
drawtext_pos = {
|
| 548 |
+
"bottom-left": f"x={margin}:y=h-th-{margin}",
|
| 549 |
+
"bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
|
| 550 |
+
"bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
|
| 551 |
+
"center": "x=(w-tw)/2:y=(h-th)/2",
|
| 552 |
+
"left": f"x={margin}:y=(h-th)/2",
|
| 553 |
+
"right": f"x=w-tw-{margin}:y=(h-th)/2",
|
| 554 |
+
"top-left": f"x={margin}:y={margin}",
|
| 555 |
+
"top-center": f"x=(w-tw)/2:y={margin}",
|
| 556 |
+
"top-right": f"x=w-tw-{margin}:y={margin}"
|
| 557 |
}
|
| 558 |
+
position = drawtext_pos.get(text_style.position, "x=(w-tw)/2:y=(h-th)/2")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
|
| 560 |
+
# Build drawtext command with background box and outline
|
| 561 |
+
# borderw = outline width, bordercolor = outline color
|
| 562 |
+
drawtext_cmd = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
'ffmpeg', '-y',
|
| 564 |
'-i', input_video,
|
| 565 |
+
'-vf', f"drawtext=text='{text_style.text}':fontfile={font_path}:fontsize={text_style.font_size}:fontcolor={text_style.color}:{position}:box=1:boxcolor={bg_color_name}@{bg_opacity}:boxborderw={text_style.padding}:borderw={text_style.outline_width}:bordercolor={text_style.outline_color}",
|
| 566 |
'-c:a', 'copy',
|
| 567 |
output_video
|
| 568 |
]
|
| 569 |
|
| 570 |
+
print(f"π¬ Running drawtext command with outline={text_style.outline_width}, bordercolor={text_style.outline_color}")
|
| 571 |
+
result = subprocess.run(drawtext_cmd, capture_output=True, text=True)
|
| 572 |
|
| 573 |
if result.returncode == 0:
|
| 574 |
+
print(f"β
Drawtext method succeeded with background and outline")
|
| 575 |
return True
|
| 576 |
else:
|
| 577 |
+
print(f"β Drawtext failed: {result.stderr}")
|
| 578 |
return False
|
| 579 |
|
| 580 |
# =============================================
|
|
|
|
| 983 |
|
| 984 |
current_video = video_path
|
| 985 |
|
| 986 |
+
# Get font path
|
| 987 |
font_path = get_font_path(request.caption_style.font_family)
|
| 988 |
if not font_path:
|
| 989 |
return CaptionResponse(
|
|
|
|
| 992 |
error=f"Caption font not found: {request.caption_style.font_family}"
|
| 993 |
)
|
| 994 |
|
| 995 |
+
# Get margin value
|
| 996 |
+
margin = request.caption_style.margin
|
| 997 |
+
|
| 998 |
+
# Map position to drawtext position with margin
|
| 999 |
+
drawtext_pos = {
|
| 1000 |
+
"bottom-left": f"x={margin}:y=h-th-{margin}",
|
| 1001 |
+
"bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
|
| 1002 |
+
"bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
|
| 1003 |
+
"center": "x=(w-tw)/2:y=(h-th)/2",
|
| 1004 |
+
"left": f"x={margin}:y=(h-th)/2",
|
| 1005 |
+
"right": f"x=w-tw-{margin}:y=(h-th)/2",
|
| 1006 |
+
"top-left": f"x={margin}:y={margin}",
|
| 1007 |
+
"top-center": f"x=(w-tw)/2:y={margin}",
|
| 1008 |
+
"top-right": f"x=w-tw-{margin}:y={margin}"
|
| 1009 |
+
}
|
| 1010 |
+
position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1011 |
|
| 1012 |
+
# Process each caption with its own drawtext filter
|
| 1013 |
+
current_input = video_path
|
| 1014 |
|
| 1015 |
+
for i, caption in enumerate(request.captions):
|
| 1016 |
+
temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4")
|
| 1017 |
+
|
| 1018 |
+
# Build drawtext filter with background box and outline
|
| 1019 |
+
drawtext_filter = (
|
| 1020 |
+
f"drawtext=text='{caption.text}':"
|
| 1021 |
+
f"fontfile={font_path}:"
|
| 1022 |
+
f"fontsize={request.caption_style.font_size}:"
|
| 1023 |
+
f"fontcolor={request.caption_style.color}:"
|
| 1024 |
+
f"{position}:"
|
| 1025 |
+
f"box=1:"
|
| 1026 |
+
f"boxcolor={request.caption_style.bg_color}:"
|
| 1027 |
+
f"boxborderw={request.caption_style.padding}:"
|
| 1028 |
+
f"borderw={request.caption_style.outline_width}:"
|
| 1029 |
+
f"bordercolor={request.caption_style.outline_color}:"
|
| 1030 |
+
f"enable='between(t,{caption.start_time},{caption.end_time})'"
|
| 1031 |
)
|
| 1032 |
+
|
| 1033 |
+
cmd = [
|
| 1034 |
+
'ffmpeg', '-y',
|
| 1035 |
+
'-i', current_input,
|
| 1036 |
+
'-vf', drawtext_filter,
|
| 1037 |
+
'-c:a', 'copy',
|
| 1038 |
+
temp_output
|
| 1039 |
+
]
|
| 1040 |
+
|
| 1041 |
+
print(f"π¬ Adding caption {i+1}/{len(request.captions)}: '{caption.text}' with outline={request.caption_style.outline_width}")
|
| 1042 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 1043 |
+
|
| 1044 |
+
if result.returncode != 0:
|
| 1045 |
+
print(f"β FFmpeg error at caption {i+1}: {result.stderr}")
|
| 1046 |
+
return CaptionResponse(
|
| 1047 |
+
status="error",
|
| 1048 |
+
project_id=request.project_id,
|
| 1049 |
+
error=f"Failed to add caption {i+1}: {result.stderr[:200]}"
|
| 1050 |
+
)
|
| 1051 |
+
|
| 1052 |
+
current_input = temp_output
|
| 1053 |
|
| 1054 |
+
final_video = current_input
|
| 1055 |
|
| 1056 |
if request.title_overlay:
|
| 1057 |
titled_path = os.path.join(work_dir, "titled.mp4")
|
| 1058 |
+
if create_text_overlay(final_video, titled_path, request.title_overlay):
|
| 1059 |
+
final_video = titled_path
|
| 1060 |
else:
|
| 1061 |
print("β οΈ Title overlay failed, continuing with captioned video")
|
| 1062 |
|
| 1063 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1064 |
final_filename = f"captioned_{timestamp}.mp4"
|
| 1065 |
+
final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos")
|
| 1066 |
|
| 1067 |
shutil.rmtree(work_dir, ignore_errors=True)
|
| 1068 |
|
|
|
|
| 1136 |
error="No captions generated from audio"
|
| 1137 |
)
|
| 1138 |
|
| 1139 |
+
# Get font path
|
| 1140 |
font_path = get_font_path(request.caption_style.font_family)
|
| 1141 |
if not font_path:
|
| 1142 |
return TranscriptionResponse(
|
|
|
|
| 1145 |
error=f"Caption font not found: {request.caption_style.font_family}"
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
+
# Get margin value
|
| 1149 |
+
margin = request.caption_style.margin
|
| 1150 |
+
|
| 1151 |
+
# Map position to drawtext position with margin
|
| 1152 |
+
drawtext_pos = {
|
| 1153 |
+
"bottom-left": f"x={margin}:y=h-th-{margin}",
|
| 1154 |
+
"bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
|
| 1155 |
+
"bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
|
| 1156 |
+
"center": "x=(w-tw)/2:y=(h-th)/2",
|
| 1157 |
+
"left": f"x={margin}:y=(h-th)/2",
|
| 1158 |
+
"right": f"x=w-tw-{margin}:y=(h-th)/2",
|
| 1159 |
+
"top-left": f"x={margin}:y={margin}",
|
| 1160 |
+
"top-center": f"x=(w-tw)/2:y={margin}",
|
| 1161 |
+
"top-right": f"x=w-tw-{margin}:y={margin}"
|
| 1162 |
+
}
|
| 1163 |
+
position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1164 |
|
| 1165 |
+
# Process each caption with its own drawtext filter
|
| 1166 |
+
current_input = video_path
|
| 1167 |
|
| 1168 |
+
for i, caption in enumerate(captions):
|
| 1169 |
+
temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4")
|
| 1170 |
+
|
| 1171 |
+
# Build drawtext filter with background box and outline
|
| 1172 |
+
drawtext_filter = (
|
| 1173 |
+
f"drawtext=text='{caption.text}':"
|
| 1174 |
+
f"fontfile={font_path}:"
|
| 1175 |
+
f"fontsize={request.caption_style.font_size}:"
|
| 1176 |
+
f"fontcolor={request.caption_style.color}:"
|
| 1177 |
+
f"{position}:"
|
| 1178 |
+
f"box=1:"
|
| 1179 |
+
f"boxcolor={request.caption_style.bg_color}:"
|
| 1180 |
+
f"boxborderw={request.caption_style.padding}:"
|
| 1181 |
+
f"borderw={request.caption_style.outline_width}:"
|
| 1182 |
+
f"bordercolor={request.caption_style.outline_color}:"
|
| 1183 |
+
f"enable='between(t,{caption.start_time},{caption.end_time})'"
|
| 1184 |
)
|
| 1185 |
+
|
| 1186 |
+
cmd = [
|
| 1187 |
+
'ffmpeg', '-y',
|
| 1188 |
+
'-i', current_input,
|
| 1189 |
+
'-vf', drawtext_filter,
|
| 1190 |
+
'-c:a', 'copy',
|
| 1191 |
+
temp_output
|
| 1192 |
+
]
|
| 1193 |
+
|
| 1194 |
+
print(f"π¬ Adding caption {i+1}/{len(captions)}: '{caption.text}'")
|
| 1195 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 1196 |
+
|
| 1197 |
+
if result.returncode != 0:
|
| 1198 |
+
print(f"β FFmpeg error at caption {i+1}: {result.stderr}")
|
| 1199 |
+
return TranscriptionResponse(
|
| 1200 |
+
status="error",
|
| 1201 |
+
project_id=request.project_id,
|
| 1202 |
+
error=f"Failed to add caption {i+1}: {result.stderr[:200]}"
|
| 1203 |
+
)
|
| 1204 |
+
|
| 1205 |
+
current_input = temp_output
|
| 1206 |
|
| 1207 |
+
final_video = current_input
|
| 1208 |
|
| 1209 |
if request.title_overlay:
|
| 1210 |
titled_path = os.path.join(work_dir, "titled.mp4")
|
| 1211 |
+
if create_text_overlay(final_video, titled_path, request.title_overlay):
|
| 1212 |
+
final_video = titled_path
|
| 1213 |
|
| 1214 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1215 |
final_filename = f"transcribed_{timestamp}.mp4"
|
| 1216 |
+
final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos")
|
| 1217 |
|
| 1218 |
transcript = " ".join([c.text for c in captions])
|
| 1219 |
|
|
|
|
| 1291 |
error=f"Caption font not found: {request.caption_style.font_family}"
|
| 1292 |
)
|
| 1293 |
|
| 1294 |
+
# Get margin value
|
| 1295 |
+
margin = request.caption_style.margin
|
| 1296 |
+
print(f"π Using margin: {margin} pixels")
|
| 1297 |
+
|
| 1298 |
+
# Map position to drawtext position with margin
|
| 1299 |
+
drawtext_pos = {
|
| 1300 |
+
"bottom-left": f"x={margin}:y=h-th-{margin}",
|
| 1301 |
+
"bottom-center": f"x=(w-tw)/2:y=h-th-{margin}",
|
| 1302 |
+
"bottom-right": f"x=w-tw-{margin}:y=h-th-{margin}",
|
| 1303 |
+
"center": "x=(w-tw)/2:y=(h-th)/2",
|
| 1304 |
+
"left": f"x={margin}:y=(h-th)/2",
|
| 1305 |
+
"right": f"x=w-tw-{margin}:y=(h-th)/2",
|
| 1306 |
+
"top-left": f"x={margin}:y={margin}",
|
| 1307 |
+
"top-center": f"x=(w-tw)/2:y={margin}",
|
| 1308 |
+
"top-right": f"x=w-tw-{margin}:y={margin}"
|
| 1309 |
+
}
|
| 1310 |
+
position = drawtext_pos.get(request.caption_style.position, f"x=(w-tw)/2:y=h-th-{margin}")
|
| 1311 |
+
print(f"π Position string: {position}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1312 |
|
| 1313 |
+
# Process each caption with its own drawtext filter
|
| 1314 |
+
current_input = video_path
|
| 1315 |
|
| 1316 |
+
for i, caption in enumerate(captions):
|
| 1317 |
+
temp_output = os.path.join(work_dir, f"step_{i:03d}.mp4")
|
| 1318 |
+
|
| 1319 |
+
# Build drawtext filter with background box and outline
|
| 1320 |
+
drawtext_filter = (
|
| 1321 |
+
f"drawtext=text='{caption.text}':"
|
| 1322 |
+
f"fontfile={font_path}:"
|
| 1323 |
+
f"fontsize={request.caption_style.font_size}:"
|
| 1324 |
+
f"fontcolor={request.caption_style.color}:"
|
| 1325 |
+
f"{position}:"
|
| 1326 |
+
f"box=1:"
|
| 1327 |
+
f"boxcolor={request.caption_style.bg_color}:"
|
| 1328 |
+
f"boxborderw={request.caption_style.padding}:"
|
| 1329 |
+
f"borderw={request.caption_style.outline_width}:"
|
| 1330 |
+
f"bordercolor={request.caption_style.outline_color}:"
|
| 1331 |
+
f"enable='between(t,{caption.start_time},{caption.end_time})'"
|
| 1332 |
)
|
| 1333 |
+
|
| 1334 |
+
cmd = [
|
| 1335 |
+
'ffmpeg', '-y',
|
| 1336 |
+
'-i', current_input,
|
| 1337 |
+
'-vf', drawtext_filter,
|
| 1338 |
+
'-c:a', 'copy',
|
| 1339 |
+
temp_output
|
| 1340 |
+
]
|
| 1341 |
+
|
| 1342 |
+
print(f"π¬ Adding caption {i+1}/{len(captions)}: '{caption.text}' at margin {margin} with outline={request.caption_style.outline_width}")
|
| 1343 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 1344 |
+
|
| 1345 |
+
if result.returncode != 0:
|
| 1346 |
+
print(f"β FFmpeg error at caption {i+1}: {result.stderr}")
|
| 1347 |
+
return CaptionResponse(
|
| 1348 |
+
status="error",
|
| 1349 |
+
project_id=request.project_id,
|
| 1350 |
+
error=f"Failed to add caption {i+1}: {result.stderr[:200]}"
|
| 1351 |
+
)
|
| 1352 |
+
|
| 1353 |
+
current_input = temp_output
|
| 1354 |
|
| 1355 |
+
final_video = current_input
|
| 1356 |
|
| 1357 |
if request.title_overlay:
|
| 1358 |
titled_path = os.path.join(work_dir, "titled.mp4")
|
| 1359 |
+
if create_text_overlay(final_video, titled_path, request.title_overlay):
|
| 1360 |
+
final_video = titled_path
|
| 1361 |
else:
|
| 1362 |
print("β οΈ Title overlay failed, continuing with captioned video")
|
| 1363 |
|
| 1364 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1365 |
final_filename = f"captioned_from_srt_{timestamp}.mp4"
|
| 1366 |
+
final_url = upload_to_dataset(final_video, request.project_id, final_filename, "videos")
|
| 1367 |
|
| 1368 |
shutil.rmtree(work_dir, ignore_errors=True)
|
| 1369 |
|
|
|
|
| 1395 |
async def root():
|
| 1396 |
return {
|
| 1397 |
"name": "Text Styling API with Auto Caption",
|
| 1398 |
+
"version": "4.0.0",
|
| 1399 |
"features": {
|
| 1400 |
"title_overlay": "β
",
|
| 1401 |
"manual_captions": "β
",
|
|
|
|
| 1403 |
"srt_support": "β
",
|
| 1404 |
"enhanced_colors": f"β
({len(COLOR_MAP)} colors)",
|
| 1405 |
"text_outline": "β
",
|
| 1406 |
+
"background_boxes": "β
",
|
| 1407 |
+
"margin_control": "β
"
|
| 1408 |
},
|
| 1409 |
"endpoints": {
|
| 1410 |
"style": "POST /api/style - Add title overlay",
|