binaryMao commited on
Commit
95a2204
·
verified ·
1 Parent(s): 47095d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -38
app.py CHANGED
@@ -1,9 +1,9 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- ROBOTSMALI — Sous-titrage Bambara (V5.3 - Production)
4
  - Vidéo d'exemple : examples/MARALINKE.mp4
5
  - Correction AttributeError: Gradio Div -> Column/HTML
6
- - Correction Codec Webcam : VP8 -> H.264
7
  """
8
  import os
9
  import shlex
@@ -22,12 +22,27 @@ from huggingface_hub import snapshot_download
22
  from nemo.collections import asr as nemo_asr
23
  import gradio as gr
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # ---------------------------- # CONFIGURATION # ----------------------------
26
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
27
  random.seed(1234)
28
  np.random.seed(1234)
29
  torch.manual_seed(1234)
30
 
 
 
31
  MODELS = {
32
  "Soloni V1 (RNNT)": ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
33
  "Soloni V0 (RNNT)": ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
@@ -37,10 +52,7 @@ MODELS = {
37
  "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
38
  }
39
 
40
- # Mise à jour avec le nom simplifié
41
- VIDEO_EXAMPLES = [
42
- ["examples/MARALINKE.mp4", "Soloba V1 (CTC)"]
43
- ]
44
 
45
  _cache = {}
46
 
@@ -63,6 +75,7 @@ def load_model(name):
63
  repo, mode = MODELS[name]
64
  folder = snapshot_download(repo, local_dir_use_symlinks=False)
65
  nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
 
66
  if mode == "rnnt":
67
  model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(nemo_file)
68
  elif mode == "ctc_char":
@@ -70,14 +83,16 @@ def load_model(name):
70
  else:
71
  try: model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
72
  except: model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
 
73
  model.to(DEVICE).eval()
74
  _cache[name] = model
75
  return model
76
 
77
  def extract_audio(video_path, out_wav):
 
78
  tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
79
  os.close(tmp_fd)
80
- # Réencodage H.264 pour supporter le flux webcam
81
  run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -c:v libx264 -preset ultrafast -crf 23 -c:a aac {shlex.quote(stabilized_mp4)}')
82
  run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}')
83
  if os.path.exists(stabilized_mp4): os.remove(stabilized_mp4)
@@ -92,14 +107,14 @@ def clean_audio(wav_path):
92
  sf.write(clean_path, audio, 16000)
93
  return clean_path, audio, 16000
94
 
95
- # ---------------------------- # PIPELINE # ----------------------------
96
 
97
  def pipeline(video_input, model_name):
98
  try:
99
- if not video_input: return "❌ Vidéo introuvable", None
100
  video_path = video_input
101
 
102
- yield "⏳ Phase 1 : Extraction audio...", None
103
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
104
  wav_path = tf.name
105
 
@@ -107,15 +122,16 @@ def pipeline(video_input, model_name):
107
  clean_wav, audio, sr = clean_audio(wav_path)
108
  duration = ffprobe_duration(video_path) or (len(audio)/sr)
109
 
110
- yield f"⏳ Phase 2 : Transcription IA ({model_name})...", None
111
  model = load_model(model_name)
112
- text = model.transcribe([clean_wav])[0]
113
- text_str = text.text if hasattr(text, 'text') else str(text)
114
- words = [w for w in text_str.split() if len(w) > 1]
115
 
116
- if not words: return "⚠️ Pas de parole détectée", None
 
117
 
118
- yield "⏳ Phase 3 : Incrustation des sous-titres...", None
 
119
  subs = []
120
  chunk_size = 7
121
  for i in range(0, len(words), chunk_size):
@@ -125,13 +141,13 @@ def pipeline(video_input, model_name):
125
  subs.append((s, e, "\n".join(textwrap.wrap(" ".join(chunk), 40))))
126
 
127
  res_v = burn(video_path, subs)
128
- yield "✅ Succès !", res_v
129
  except Exception as e:
130
  traceback.print_exc()
131
- yield f"❌ Erreur: {str(e)}", None
132
 
133
  def burn(video_path, subs):
134
- out_path = "RobotsMali_Final.mp4"
135
  with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
136
  for idx, (start, end, text) in enumerate(subs, 1):
137
  def t_srt(sec):
@@ -139,8 +155,9 @@ def burn(video_path, subs):
139
  return f"{h:02}:{m:02}:{s:02},{ms:03}"
140
  tf.write(f"{idx}\n{t_srt(start)} --> {t_srt(end)}\n{text}\n\n")
141
  srt_name = tf.name
142
- vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
143
- run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -crf 23 -c:a aac {shlex.quote(out_path)}')
 
144
  os.remove(srt_name)
145
  return out_path
146
 
@@ -148,45 +165,42 @@ def burn(video_path, subs):
148
 
149
  custom_css = """
150
  body { background-color: #0b0e14; }
151
- .gradio-container { background: rgba(17, 25, 40, 0.8) !important; backdrop-filter: blur(12px); border-radius: 20px; border: 1px solid rgba(255, 255, 255, 0.1); }
152
- #title-container { text-align: center; padding: 20px; }
153
- .gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; }
 
154
  """
155
 
156
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
157
- # Remplacement de gr.Div par gr.Column (Fix AttributeError)
158
- with gr.Column(elem_id="title-container"):
159
  gr.HTML("""
160
- <h1 style='color:#facc15; font-size: 2.5rem; margin:0;'>🤖 ROBOTSMALI</h1>
161
- <p style='color:#94a3b8;'>Intelligence Artificielle pour le Bambara</p>
162
  <div style="height: 3px; width: 60px; background: #facc15; margin: 15px auto;"></div>
163
  """)
164
 
165
  with gr.Row():
166
  with gr.Column():
167
- gr.Markdown("### 📥 Entrée")
168
  v_in = gr.Video(label=None, mirror_webcam=False)
169
- m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle")
170
- btn = gr.Button("🚀 GÉNÉRER", variant="primary")
171
 
172
  with gr.Column():
173
  gr.Markdown("### 📤 Résultat")
174
- status = gr.Markdown("*Prêt...*")
175
  v_out = gr.Video(label=None)
176
 
177
- # Section des exemples (Utilise maintenant MARALINKE.mp4)
178
  gr.Examples(
179
  examples=VIDEO_EXAMPLES,
180
  inputs=[v_in, m_sel],
181
- label="📺 Exemples de Clips"
182
  )
183
 
184
- gr.HTML("<div style='text-align: center; color: #475569; margin-top: 30px;'>© 2025 RobotsMali - Bamako</div>")
185
 
186
  btn.click(pipeline, [v_in, m_sel], [status, v_out])
187
 
188
  if __name__ == "__main__":
189
- # Petit check de debug pour le dossier examples
190
- if not os.path.exists("examples/MARALINKE.mp4"):
191
- print("⚠️ ATTENTION : examples/MARALINKE.mp4 est introuvable sur le serveur.")
192
  demo.launch(share=True, debug=True)
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ ROBOTSMALI — Sous-titrage Bambara (V5.5 - Production Ready)
4
  - Vidéo d'exemple : examples/MARALINKE.mp4
5
  - Correction AttributeError: Gradio Div -> Column/HTML
6
+ - Correction Codec Webcam : VP8 -> H.264 (Stabilisation forcée)
7
  """
8
  import os
9
  import shlex
 
22
  from nemo.collections import asr as nemo_asr
23
  import gradio as gr
24
 
25
+ # ---------------------------- # VÉRIFICATION DIAGNOSTIC # ----------------------------
26
+ print("--- DIAGNOSTIC DES FICHIERS ---")
27
+ example_path = "examples/MARALINKE.mp4"
28
+ if os.path.exists(example_path):
29
+ print(f"✅ SUCCÈS : {example_path} est bien présent.")
30
+ else:
31
+ print(f"❌ ERREUR : {example_path} est introuvable !")
32
+ if os.path.exists("examples"):
33
+ print(f"Contenu réel du dossier examples/ : {os.listdir('examples')}")
34
+ else:
35
+ print("Le dossier 'examples' n'existe pas à la racine du projet.")
36
+ print("-------------------------------")
37
+
38
  # ---------------------------- # CONFIGURATION # ----------------------------
39
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
40
  random.seed(1234)
41
  np.random.seed(1234)
42
  torch.manual_seed(1234)
43
 
44
+ SEGMENT_DURATION = 10.0
45
+
46
  MODELS = {
47
  "Soloni V1 (RNNT)": ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
48
  "Soloni V0 (RNNT)": ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
 
52
  "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
53
  }
54
 
55
+ VIDEO_EXAMPLES = [[example_path, "Soloba V1 (CTC)"]]
 
 
 
56
 
57
  _cache = {}
58
 
 
75
  repo, mode = MODELS[name]
76
  folder = snapshot_download(repo, local_dir_use_symlinks=False)
77
  nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
78
+
79
  if mode == "rnnt":
80
  model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(nemo_file)
81
  elif mode == "ctc_char":
 
83
  else:
84
  try: model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
85
  except: model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
86
+
87
  model.to(DEVICE).eval()
88
  _cache[name] = model
89
  return model
90
 
91
  def extract_audio(video_path, out_wav):
92
+ """Stabilisation pour flux webcam et extraction audio."""
93
  tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
94
  os.close(tmp_fd)
95
+ # Correction WebM/Webcam : réencodage libx264 forcé
96
  run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -c:v libx264 -preset ultrafast -crf 23 -c:a aac {shlex.quote(stabilized_mp4)}')
97
  run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}')
98
  if os.path.exists(stabilized_mp4): os.remove(stabilized_mp4)
 
107
  sf.write(clean_path, audio, 16000)
108
  return clean_path, audio, 16000
109
 
110
+ # ---------------------------- # PIPELINE PRINCIPAL # ----------------------------
111
 
112
  def pipeline(video_input, model_name):
113
  try:
114
+ if not video_input: return "❌ Vidéo introuvable. Veuillez réessayer.", None
115
  video_path = video_input
116
 
117
+ yield "⏳ Phase 1/3 : Analyse du fichier et extraction audio...", None
118
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
119
  wav_path = tf.name
120
 
 
122
  clean_wav, audio, sr = clean_audio(wav_path)
123
  duration = ffprobe_duration(video_path) or (len(audio)/sr)
124
 
125
+ yield f"⏳ Phase 2/3 : Transcription IA avec {model_name}...", None
126
  model = load_model(model_name)
127
+ text_out = model.transcribe([clean_wav])[0]
128
+ text_str = text_out.text if hasattr(text_out, 'text') else str(text_out)
 
129
 
130
+ words = [w for w in text_str.split() if len(w) > 1]
131
+ if not words: return "⚠️ Aucune parole détectée dans la vidéo.", None
132
 
133
+ yield "⏳ Phase 3/3 : Incrustation des sous-titres...", None
134
+ # Heuristique d'alignement simple
135
  subs = []
136
  chunk_size = 7
137
  for i in range(0, len(words), chunk_size):
 
141
  subs.append((s, e, "\n".join(textwrap.wrap(" ".join(chunk), 40))))
142
 
143
  res_v = burn(video_path, subs)
144
+ yield "✅ Succès ! Votre vidéo est prête.", res_v
145
  except Exception as e:
146
  traceback.print_exc()
147
+ yield f"❌ Erreur : {str(e)}", None
148
 
149
  def burn(video_path, subs):
150
+ out_path = "RobotsMali_Subtitled.mp4"
151
  with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
152
  for idx, (start, end, text) in enumerate(subs, 1):
153
  def t_srt(sec):
 
155
  return f"{h:02}:{m:02}:{s:02},{ms:03}"
156
  tf.write(f"{idx}\n{t_srt(start)} --> {t_srt(end)}\n{text}\n\n")
157
  srt_name = tf.name
158
+
159
+ vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=24,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
160
+ run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -preset fast -crf 23 -c:a aac {shlex.quote(out_path)}')
161
  os.remove(srt_name)
162
  return out_path
163
 
 
165
 
166
  custom_css = """
167
  body { background-color: #0b0e14; }
168
+ .gradio-container { background: rgba(17, 25, 40, 0.8) !important; backdrop-filter: blur(12px); border-radius: 20px; border: 1px solid rgba(255, 255, 255, 0.1); box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37); }
169
+ #title-block { text-align: center; padding: 20px; }
170
+ .gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; font-weight: bold !important; transition: all 0.3s ease !important; }
171
+ .gr-button-primary:hover { transform: scale(1.02); box-shadow: 0 0 15px rgba(16, 185, 129, 0.4); }
172
  """
173
 
174
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
175
+ with gr.Column(elem_id="title-block"):
 
176
  gr.HTML("""
177
+ <h1 style='color:#facc15; font-size: 2.5rem; margin-bottom:0;'>🤖 ROBOTSMALI</h1>
178
+ <p style='color:#94a3b8; font-size: 1.1rem;'>Intelligence Artificielle pour la Langue Bambara</p>
179
  <div style="height: 3px; width: 60px; background: #facc15; margin: 15px auto;"></div>
180
  """)
181
 
182
  with gr.Row():
183
  with gr.Column():
184
+ gr.Markdown("### 📥 Source Vidéo")
185
  v_in = gr.Video(label=None, mirror_webcam=False)
186
+ m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle IA")
187
+ btn = gr.Button("🚀 GÉNÉRER LES SOUS-TITRES", variant="primary")
188
 
189
  with gr.Column():
190
  gr.Markdown("### 📤 Résultat")
191
+ status = gr.Markdown("*En attente de traitement...*")
192
  v_out = gr.Video(label=None)
193
 
194
+ # Section Exemples
195
  gr.Examples(
196
  examples=VIDEO_EXAMPLES,
197
  inputs=[v_in, m_sel],
198
+ label="📺 Sélectionner une vidéo d'exemple"
199
  )
200
 
201
+ gr.HTML("<div style='text-align: center; color: #475569; margin-top: 30px; font-size: 0.9rem;'>© 2025 RobotsMali Bamako, Mali</div>")
202
 
203
  btn.click(pipeline, [v_in, m_sel], [status, v_out])
204
 
205
  if __name__ == "__main__":
 
 
 
206
  demo.launch(share=True, debug=True)