Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@
|
|
| 8 |
# - Lyrics mode: Enable word_timestamps for music-like precision.
|
| 9 |
# - Trim: Skip short/silent segments (<0.5s).
|
| 10 |
# 4. Enhancements: Word emphasis (e.g., wrap "wow" in bold/color tags).
|
| 11 |
-
# 5. Translation: Optional to 120+ langs via
|
| 12 |
# 6. ASS subtitle creation: Styled with fonts/colors/sizes/positions/animations/emojis.
|
| 13 |
# 7. Burn to video: FFmpeg overlays HD output, no watermark.
|
| 14 |
# 8. UI: Simple, free, viral-ready for Reels/YouTube.
|
|
@@ -21,10 +21,7 @@ from transformers import pipeline
|
|
| 21 |
import torch
|
| 22 |
import ffmpeg
|
| 23 |
from yt_dlp import YoutubeDL
|
| 24 |
-
from googletrans import Translator #
|
| 25 |
-
# Note: For argostranslate, uncomment below and pre-install packs in HF Space Dockerfile if needed.
|
| 26 |
-
# from argostranslate import package, translate
|
| 27 |
-
# package.update_package_index() # Run once
|
| 28 |
|
| 29 |
# Model options (lighter for speed)
|
| 30 |
MODEL_CHOICES = {
|
|
@@ -96,13 +93,13 @@ def transcribe_audio(audio_path, model_name, lyrics_mode, progress=gr.Progress()
|
|
| 96 |
# Generate kwargs for accuracy boost (transcribe task, auto lang for Hinglish)
|
| 97 |
generate_kwargs = {"task": "transcribe", "language": None} # Auto-detect Hindi/English mix
|
| 98 |
if lyrics_mode:
|
| 99 |
-
generate_kwargs["word_timestamps"] = True # Lyrics precision
|
| 100 |
|
| 101 |
progress(0.5, desc="Transcribing...")
|
| 102 |
result = pipe(audio_path, generate_kwargs=generate_kwargs)
|
| 103 |
|
| 104 |
-
# Extract segments, trim silences (short <0.5s)
|
| 105 |
-
segments = result.get('
|
| 106 |
trimmed_segments = [s for s in segments if (s['end'] - s['start']) > 0.5]
|
| 107 |
|
| 108 |
progress(1, desc="Transcription complete!")
|
|
@@ -112,7 +109,10 @@ def translate_text(text, target_lang):
|
|
| 112 |
"""Optional translation (Blink-like: 120+ langs)."""
|
| 113 |
if target_lang == "en": # No translate
|
| 114 |
return text
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
def create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress=gr.Progress()):
|
| 118 |
"""Create ASS subtitles (styled like Blink: fonts/colors/emojis/highlights/animations)."""
|
|
@@ -147,10 +147,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
| 147 |
end = f"{int(seg['end']*100)}"
|
| 148 |
text = translate_text(seg['text'].strip(), target_lang)
|
| 149 |
|
| 150 |
-
# Word emphasis/highlights (
|
| 151 |
for word in emphasis_words:
|
| 152 |
if word.lower() in text.lower():
|
| 153 |
-
text = text.replace(word, f"{{\\b1\\c
|
| 154 |
|
| 155 |
# Add emoji example (Blink-like: one-click emojis)
|
| 156 |
if "!" in text:
|
|
@@ -213,7 +213,7 @@ def main_process(
|
|
| 213 |
raise gr.Error("No speech detected!")
|
| 214 |
|
| 215 |
# Emphasis words
|
| 216 |
-
emphasis_words = emphasis_words_str.split(',') if emphasis_words_str else []
|
| 217 |
|
| 218 |
# Create styled ASS
|
| 219 |
ass_path = create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress)
|
|
@@ -232,9 +232,9 @@ def main_process(
|
|
| 232 |
f.write(srt_content)
|
| 233 |
srt_path = f.name
|
| 234 |
|
| 235 |
-
# Preview thumbnail (simple FFmpeg extract)
|
| 236 |
thumb_path = video_path.rsplit('.', 1)[0] + '_thumb.jpg'
|
| 237 |
-
ffmpeg.input(video_path, ss=1).output(thumb_path, vframes=1).run(quiet=True)
|
| 238 |
|
| 239 |
return output_video, srt_path, thumb_path
|
| 240 |
|
|
|
|
| 8 |
# - Lyrics mode: Enable word_timestamps for music-like precision.
|
| 9 |
# - Trim: Skip short/silent segments (<0.5s).
|
| 10 |
# 4. Enhancements: Word emphasis (e.g., wrap "wow" in bold/color tags).
|
| 11 |
+
# 5. Translation: Optional to 120+ langs via googletrans (stable version).
|
| 12 |
# 6. ASS subtitle creation: Styled with fonts/colors/sizes/positions/animations/emojis.
|
| 13 |
# 7. Burn to video: FFmpeg overlays HD output, no watermark.
|
| 14 |
# 8. UI: Simple, free, viral-ready for Reels/YouTube.
|
|
|
|
| 21 |
import torch
|
| 22 |
import ffmpeg
|
| 23 |
from yt_dlp import YoutubeDL
|
| 24 |
+
from googletrans import Translator # Stable version now
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# Model options (lighter for speed)
|
| 27 |
MODEL_CHOICES = {
|
|
|
|
| 93 |
# Generate kwargs for accuracy boost (transcribe task, auto lang for Hinglish)
|
| 94 |
generate_kwargs = {"task": "transcribe", "language": None} # Auto-detect Hindi/English mix
|
| 95 |
if lyrics_mode:
|
| 96 |
+
generate_kwargs["word_timestamps"] = True # Lyrics precision (supported in v3)
|
| 97 |
|
| 98 |
progress(0.5, desc="Transcribing...")
|
| 99 |
result = pipe(audio_path, generate_kwargs=generate_kwargs)
|
| 100 |
|
| 101 |
+
# Extract segments, trim silences (short <0.5s) - FIXED: 'segments' not 'chunks'
|
| 102 |
+
segments = result.get('segments', [])
|
| 103 |
trimmed_segments = [s for s in segments if (s['end'] - s['start']) > 0.5]
|
| 104 |
|
| 105 |
progress(1, desc="Transcription complete!")
|
|
|
|
| 109 |
"""Optional translation (Blink-like: 120+ langs)."""
|
| 110 |
if target_lang == "en": # No translate
|
| 111 |
return text
|
| 112 |
+
try:
|
| 113 |
+
return translator.translate(text, dest=target_lang).text
|
| 114 |
+
except Exception:
|
| 115 |
+
return text # Fallback on error
|
| 116 |
|
| 117 |
def create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress=gr.Progress()):
|
| 118 |
"""Create ASS subtitles (styled like Blink: fonts/colors/emojis/highlights/animations)."""
|
|
|
|
| 147 |
end = f"{int(seg['end']*100)}"
|
| 148 |
text = translate_text(seg['text'].strip(), target_lang)
|
| 149 |
|
| 150 |
+
# Word emphasis/highlights (case-insensitive check, FIXED)
|
| 151 |
for word in emphasis_words:
|
| 152 |
if word.lower() in text.lower():
|
| 153 |
+
text = text.replace(word, f"{{\\b1\\c&HFF0000&}}{word}{{\\b0}}", 1) # Red bold, limit to 1 replace
|
| 154 |
|
| 155 |
# Add emoji example (Blink-like: one-click emojis)
|
| 156 |
if "!" in text:
|
|
|
|
| 213 |
raise gr.Error("No speech detected!")
|
| 214 |
|
| 215 |
# Emphasis words
|
| 216 |
+
emphasis_words = [w.strip() for w in emphasis_words_str.split(',') if w.strip()] if emphasis_words_str else []
|
| 217 |
|
| 218 |
# Create styled ASS
|
| 219 |
ass_path = create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress)
|
|
|
|
| 232 |
f.write(srt_content)
|
| 233 |
srt_path = f.name
|
| 234 |
|
| 235 |
+
# Preview thumbnail (simple FFmpeg extract, FIXED: use run)
|
| 236 |
thumb_path = video_path.rsplit('.', 1)[0] + '_thumb.jpg'
|
| 237 |
+
ffmpeg.input(video_path, ss=1).output(thumb_path, vframes=1).run(quiet=True, overwrite_output=True)
|
| 238 |
|
| 239 |
return output_video, srt_path, thumb_path
|
| 240 |
|