binaryMao commited on
Commit
d7541cf
·
verified ·
1 Parent(s): 27e6201

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import os, warnings, logging, tempfile
2
 
 
3
  warnings.filterwarnings("ignore")
4
  logging.getLogger("nemo_logger").setLevel(logging.ERROR)
5
 
 
6
  os.environ["NEMO_FORCE_CPU"] = "1"
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
@@ -15,12 +17,19 @@ import soundfile as sf
15
  from moviepy.editor import VideoFileClip, CompositeVideoClip, TextClip
16
  from nemo.collections import asr as nemo_asr
17
 
 
 
 
 
 
 
 
18
 
19
  # ---------------- CONFIG ---------------- #
20
 
21
  SR = 16000
22
  MAX_VIDEO_BYTES = 200_000_000 # 200MB limite
23
- TITLE = "RobotsMali Caption Studio — Sous-titrage Automatique en Bambara"
24
 
25
  ASR_MODELS = {
26
  "Soloba CTC 0.6B V0": "RobotsMali/soloba-ctc-0.6b-v0",
@@ -54,15 +63,15 @@ def extract_audio(video_path, wav_path):
54
  raise RuntimeError("⚠️ Vidéo trop lourde (>200MB). Compressez puis réessayez.")
55
 
56
  os.system(f"ffmpeg -y -i '{video_path}' -ac 1 -ar {SR} -vn '{wav_path}' >/dev/null 2>&1")
57
-
58
  audio, sr = sf.read(wav_path)
 
59
  if sr == 0 or len(audio) == 0:
60
  raise RuntimeError("⚠️ Audio introuvable ou illisible.")
61
 
62
  return len(audio) / sr
63
 
64
 
65
- # ---------------- TRANSCRIBE (UNIFIÉ + SÛR) ---------------- #
66
 
67
  def transcribe(model, device, wav_path, model_key):
68
  audio, sr = sf.read(wav_path)
@@ -79,7 +88,7 @@ def transcribe(model, device, wav_path, model_key):
79
  x = torch.tensor(audio, dtype=torch.float32).unsqueeze(0).to(device)
80
  ln = torch.tensor([x.shape[1]]).to(device)
81
 
82
- # ---- Priority 1: Soloni precise timestamps ---- #
83
  if "Soloni" in model_key and hasattr(model, "decode_and_align"):
84
  try:
85
  with torch.no_grad():
@@ -91,16 +100,13 @@ def transcribe(model, device, wav_path, model_key):
91
  encoder_output=proc,
92
  encoded_lengths=plen
93
  )
94
-
95
  hyp = hyps[0][0] if isinstance(hyps[0], list) else hyps[0]
96
-
97
  if hasattr(hyp, "words") and hyp.words:
98
  return [(w.start_offset_ms/1000, w.end_offset_ms/1000, w.word) for w in hyp.words]
99
-
100
  except:
101
- pass # fallback auto
102
 
103
- # ---- Priority 2: Universal fallback ---- #
104
  out = model.transcribe([wav_path])[0]
105
 
106
  if hasattr(out, "text"):
@@ -123,6 +129,7 @@ def transcribe(model, device, wav_path, model_key):
123
  t += d
124
  if t >= total_s:
125
  break
 
126
  return subs
127
 
128
 
@@ -141,6 +148,7 @@ def burn(video_path, subs):
141
  txt = TextClip(
142
  w.upper(),
143
  fontsize=int(H/20),
 
144
  color="white",
145
  stroke_color="black",
146
  stroke_width=2,
@@ -169,20 +177,19 @@ def pipeline(video, model_name, progress=gr.Progress()):
169
 
170
  with tempfile.TemporaryDirectory() as td:
171
  wav = f"{td}/audio.wav"
172
-
173
  progress(0.5, "Extraction audio…")
174
- duration = extract_audio(video, wav)
175
 
176
- progress(0.75, "Transcription en Bambara…")
177
  subs = transcribe(model, device, wav, model_name)
178
  if not subs:
179
  return "⚠️ Aucun mot détecté.", None
180
 
181
- progress(0.95, "Incrustation des sous-titres…")
182
  out = burn(video, subs)
183
 
184
  progress(1.0, "✅ Terminé")
185
- return f"✅ Sous-titrage terminé avec **{model_name}**", out
186
 
187
 
188
  # ---------------- UI ---------------- #
@@ -194,7 +201,7 @@ h1 { text-align:center; font-weight:800; color:#005BFF; margin-bottom:6px; }
194
  """
195
 
196
  with gr.Blocks(css=CSS, title=TITLE) as demo:
197
- gr.Markdown("<h1>RobotsMali Caption Studio</h1><p>Génération automatique de sous-titres en Bambara</p>")
198
  video = gr.File(label="🎥 Importer une vidéo (max 200MB)", type="filepath")
199
  model = gr.Dropdown(list(ASR_MODELS.keys()), value="Soloni 114M TDT CTC V1", label="🧠 Modèle ASR")
200
  run = gr.Button("🚀 Générer les sous-titres")
 
1
  import os, warnings, logging, tempfile
2
 
3
+ # --- Suppression warnings inutile ---
4
  warnings.filterwarnings("ignore")
5
  logging.getLogger("nemo_logger").setLevel(logging.ERROR)
6
 
7
+ # --- CPU compatibility for HF Spaces ---
8
  os.environ["NEMO_FORCE_CPU"] = "1"
9
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
10
 
 
17
  from moviepy.editor import VideoFileClip, CompositeVideoClip, TextClip
18
  from nemo.collections import asr as nemo_asr
19
 
20
+ # --- UNLOCK IMAGEMAGICK POLICY (Required on HF) ---
21
+ for p in ("/etc/ImageMagick/policy.xml", "/etc/ImageMagick-6/policy.xml"):
22
+ if os.path.exists(p):
23
+ os.system(
24
+ 'sed -i "s/rights=\\"none\\"/rights=\\"read|write\\"/g" "{}"'.format(p)
25
+ )
26
+
27
 
28
  # ---------------- CONFIG ---------------- #
29
 
30
  SR = 16000
31
  MAX_VIDEO_BYTES = 200_000_000 # 200MB limite
32
+ TITLE = "RobotsMali Caption Studio — Sous-titrage Bambara Automatique"
33
 
34
  ASR_MODELS = {
35
  "Soloba CTC 0.6B V0": "RobotsMali/soloba-ctc-0.6b-v0",
 
63
  raise RuntimeError("⚠️ Vidéo trop lourde (>200MB). Compressez puis réessayez.")
64
 
65
  os.system(f"ffmpeg -y -i '{video_path}' -ac 1 -ar {SR} -vn '{wav_path}' >/dev/null 2>&1")
 
66
  audio, sr = sf.read(wav_path)
67
+
68
  if sr == 0 or len(audio) == 0:
69
  raise RuntimeError("⚠️ Audio introuvable ou illisible.")
70
 
71
  return len(audio) / sr
72
 
73
 
74
+ # ---------------- TRANSCRIBE (UNIFIÉ + SAFE) ---------------- #
75
 
76
  def transcribe(model, device, wav_path, model_key):
77
  audio, sr = sf.read(wav_path)
 
88
  x = torch.tensor(audio, dtype=torch.float32).unsqueeze(0).to(device)
89
  ln = torch.tensor([x.shape[1]]).to(device)
90
 
91
+ # ---- Soloni: timestamps réels ---- #
92
  if "Soloni" in model_key and hasattr(model, "decode_and_align"):
93
  try:
94
  with torch.no_grad():
 
100
  encoder_output=proc,
101
  encoded_lengths=plen
102
  )
 
103
  hyp = hyps[0][0] if isinstance(hyps[0], list) else hyps[0]
 
104
  if hasattr(hyp, "words") and hyp.words:
105
  return [(w.start_offset_ms/1000, w.end_offset_ms/1000, w.word) for w in hyp.words]
 
106
  except:
107
+ pass
108
 
109
+ # ---- Fallback universel (Soloba + QuartzNet + backup Soloni) ---- #
110
  out = model.transcribe([wav_path])[0]
111
 
112
  if hasattr(out, "text"):
 
129
  t += d
130
  if t >= total_s:
131
  break
132
+
133
  return subs
134
 
135
 
 
148
  txt = TextClip(
149
  w.upper(),
150
  fontsize=int(H/20),
151
+ font="DejaVu-Sans", # ✅ Police stable
152
  color="white",
153
  stroke_color="black",
154
  stroke_width=2,
 
177
 
178
  with tempfile.TemporaryDirectory() as td:
179
  wav = f"{td}/audio.wav"
 
180
  progress(0.5, "Extraction audio…")
181
+ extract_audio(video, wav)
182
 
183
+ progress(0.75, "Transcription…")
184
  subs = transcribe(model, device, wav, model_name)
185
  if not subs:
186
  return "⚠️ Aucun mot détecté.", None
187
 
188
+ progress(0.95, "Incrustation…")
189
  out = burn(video, subs)
190
 
191
  progress(1.0, "✅ Terminé")
192
+ return f"✅ Sous-titrage terminé ({model_name})", out
193
 
194
 
195
  # ---------------- UI ---------------- #
 
201
  """
202
 
203
  with gr.Blocks(css=CSS, title=TITLE) as demo:
204
+ gr.Markdown("<h1>RobotsMali Caption Studio</h1><p>Sous-titrage Automatique en Bambara</p>")
205
  video = gr.File(label="🎥 Importer une vidéo (max 200MB)", type="filepath")
206
  model = gr.Dropdown(list(ASR_MODELS.keys()), value="Soloni 114M TDT CTC V1", label="🧠 Modèle ASR")
207
  run = gr.Button("🚀 Générer les sous-titres")