Realmeas commited on
Commit
90e9f32
·
verified ·
1 Parent(s): 47d8439

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -8,7 +8,7 @@
8
  # - Lyrics mode: Enable word_timestamps for music-like precision.
9
  # - Trim: Skip short/silent segments (<0.5s).
10
  # 4. Enhancements: Word emphasis (e.g., wrap "wow" in bold/color tags).
11
- # 5. Translation: Optional to 120+ langs via argostranslate (pre-install common packs).
12
  # 6. ASS subtitle creation: Styled with fonts/colors/sizes/positions/animations/emojis.
13
  # 7. Burn to video: FFmpeg overlays HD output, no watermark.
14
  # 8. UI: Simple, free, viral-ready for Reels/YouTube.
@@ -21,10 +21,7 @@ from transformers import pipeline
21
  import torch
22
  import ffmpeg
23
  from yt_dlp import YoutubeDL
24
- from googletrans import Translator # Fallback to googletrans for simplicity (argos heavy for 120+ langs)
25
- # Note: For argostranslate, uncomment below and pre-install packs in HF Space Dockerfile if needed.
26
- # from argostranslate import package, translate
27
- # package.update_package_index() # Run once
28
 
29
  # Model options (lighter for speed)
30
  MODEL_CHOICES = {
@@ -96,13 +93,13 @@ def transcribe_audio(audio_path, model_name, lyrics_mode, progress=gr.Progress()
96
  # Generate kwargs for accuracy boost (transcribe task, auto lang for Hinglish)
97
  generate_kwargs = {"task": "transcribe", "language": None} # Auto-detect Hindi/English mix
98
  if lyrics_mode:
99
- generate_kwargs["word_timestamps"] = True # Lyrics precision
100
 
101
  progress(0.5, desc="Transcribing...")
102
  result = pipe(audio_path, generate_kwargs=generate_kwargs)
103
 
104
- # Extract segments, trim silences (short <0.5s)
105
- segments = result.get('chunks', [])
106
  trimmed_segments = [s for s in segments if (s['end'] - s['start']) > 0.5]
107
 
108
  progress(1, desc="Transcription complete!")
@@ -112,7 +109,10 @@ def translate_text(text, target_lang):
112
  """Optional translation (Blink-like: 120+ langs)."""
113
  if target_lang == "en": # No translate
114
  return text
115
- return translator.translate(text, dest=target_lang).text
 
 
 
116
 
117
  def create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress=gr.Progress()):
118
  """Create ASS subtitles (styled like Blink: fonts/colors/emojis/highlights/animations)."""
@@ -147,10 +147,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
147
  end = f"{int(seg['end']*100)}"
148
  text = translate_text(seg['text'].strip(), target_lang)
149
 
150
- # Word emphasis/highlights (e.g., make "wow" pop with bold/color)
151
  for word in emphasis_words:
152
  if word.lower() in text.lower():
153
- text = text.replace(word, f"{{\\b1\\c{&HFF0000&}}}" + word + "{\\b0}")
154
 
155
  # Add emoji example (Blink-like: one-click emojis)
156
  if "!" in text:
@@ -213,7 +213,7 @@ def main_process(
213
  raise gr.Error("No speech detected!")
214
 
215
  # Emphasis words
216
- emphasis_words = emphasis_words_str.split(',') if emphasis_words_str else []
217
 
218
  # Create styled ASS
219
  ass_path = create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress)
@@ -232,9 +232,9 @@ def main_process(
232
  f.write(srt_content)
233
  srt_path = f.name
234
 
235
- # Preview thumbnail (simple FFmpeg extract)
236
  thumb_path = video_path.rsplit('.', 1)[0] + '_thumb.jpg'
237
- ffmpeg.input(video_path, ss=1).output(thumb_path, vframes=1).run(quiet=True)
238
 
239
  return output_video, srt_path, thumb_path
240
 
 
8
  # - Lyrics mode: Enable word_timestamps for music-like precision.
9
  # - Trim: Skip short/silent segments (<0.5s).
10
  # 4. Enhancements: Word emphasis (e.g., wrap "wow" in bold/color tags).
11
+ # 5. Translation: Optional to 120+ langs via googletrans (stable version).
12
  # 6. ASS subtitle creation: Styled with fonts/colors/sizes/positions/animations/emojis.
13
  # 7. Burn to video: FFmpeg overlays HD output, no watermark.
14
  # 8. UI: Simple, free, viral-ready for Reels/YouTube.
 
21
  import torch
22
  import ffmpeg
23
  from yt_dlp import YoutubeDL
24
+ from googletrans import Translator # Stable version now
 
 
 
25
 
26
  # Model options (lighter for speed)
27
  MODEL_CHOICES = {
 
93
  # Generate kwargs for accuracy boost (transcribe task, auto lang for Hinglish)
94
  generate_kwargs = {"task": "transcribe", "language": None} # Auto-detect Hindi/English mix
95
  if lyrics_mode:
96
+ generate_kwargs["word_timestamps"] = True # Lyrics precision (supported in v3)
97
 
98
  progress(0.5, desc="Transcribing...")
99
  result = pipe(audio_path, generate_kwargs=generate_kwargs)
100
 
101
+ # Extract segments, trim silences (short <0.5s) - FIXED: 'segments' not 'chunks'
102
+ segments = result.get('segments', [])
103
  trimmed_segments = [s for s in segments if (s['end'] - s['start']) > 0.5]
104
 
105
  progress(1, desc="Transcription complete!")
 
109
  """Optional translation (Blink-like: 120+ langs)."""
110
  if target_lang == "en": # No translate
111
  return text
112
+ try:
113
+ return translator.translate(text, dest=target_lang).text
114
+ except Exception:
115
+ return text # Fallback on error
116
 
117
  def create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress=gr.Progress()):
118
  """Create ASS subtitles (styled like Blink: fonts/colors/emojis/highlights/animations)."""
 
147
  end = f"{int(seg['end']*100)}"
148
  text = translate_text(seg['text'].strip(), target_lang)
149
 
150
+ # Word emphasis/highlights (case-insensitive check, FIXED)
151
  for word in emphasis_words:
152
  if word.lower() in text.lower():
153
+ text = text.replace(word, f"{{\\b1\\c&HFF0000&}}{word}{{\\b0}}", 1) # Red bold, limit to 1 replace
154
 
155
  # Add emoji example (Blink-like: one-click emojis)
156
  if "!" in text:
 
213
  raise gr.Error("No speech detected!")
214
 
215
  # Emphasis words
216
+ emphasis_words = [w.strip() for w in emphasis_words_str.split(',') if w.strip()] if emphasis_words_str else []
217
 
218
  # Create styled ASS
219
  ass_path = create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress)
 
232
  f.write(srt_content)
233
  srt_path = f.name
234
 
235
+ # Preview thumbnail (simple FFmpeg extract, FIXED: use run)
236
  thumb_path = video_path.rsplit('.', 1)[0] + '_thumb.jpg'
237
+ ffmpeg.input(video_path, ss=1).output(thumb_path, vframes=1).run(quiet=True, overwrite_output=True)
238
 
239
  return output_video, srt_path, thumb_path
240