binaryMao commited on
Commit
e7976e4
·
verified ·
1 Parent(s): 95a2204

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -72
app.py CHANGED
@@ -1,9 +1,7 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- ROBOTSMALI — Sous-titrage Bambara (V5.5 - Production Ready)
4
- - Vidéo d'exemple : examples/MARALINKE.mp4
5
- - Correction AttributeError: Gradio Div -> Column/HTML
6
- - Correction Codec Webcam : VP8 -> H.264 (Stabilisation forcée)
7
  """
8
  import os
9
  import shlex
@@ -22,27 +20,12 @@ from huggingface_hub import snapshot_download
22
  from nemo.collections import asr as nemo_asr
23
  import gradio as gr
24
 
25
- # ---------------------------- # VÉRIFICATION DIAGNOSTIC # ----------------------------
26
- print("--- DIAGNOSTIC DES FICHIERS ---")
27
- example_path = "examples/MARALINKE.mp4"
28
- if os.path.exists(example_path):
29
- print(f"✅ SUCCÈS : {example_path} est bien présent.")
30
- else:
31
- print(f"❌ ERREUR : {example_path} est introuvable !")
32
- if os.path.exists("examples"):
33
- print(f"Contenu réel du dossier examples/ : {os.listdir('examples')}")
34
- else:
35
- print("Le dossier 'examples' n'existe pas à la racine du projet.")
36
- print("-------------------------------")
37
-
38
- # ---------------------------- # CONFIGURATION # ----------------------------
39
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
40
  random.seed(1234)
41
  np.random.seed(1234)
42
  torch.manual_seed(1234)
43
 
44
- SEGMENT_DURATION = 10.0
45
-
46
  MODELS = {
47
  "Soloni V1 (RNNT)": ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
48
  "Soloni V0 (RNNT)": ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
@@ -52,7 +35,10 @@ MODELS = {
52
  "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
53
  }
54
 
55
- VIDEO_EXAMPLES = [[example_path, "Soloba V1 (CTC)"]]
 
 
 
56
 
57
  _cache = {}
58
 
@@ -61,7 +47,7 @@ _cache = {}
61
  def run_cmd(cmd):
62
  res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
63
  if res.returncode != 0:
64
- raise RuntimeError(f"Erreur FFmpeg: {res.stdout}")
65
  return res.stdout
66
 
67
  def ffprobe_duration(path):
@@ -89,10 +75,10 @@ def load_model(name):
89
  return model
90
 
91
  def extract_audio(video_path, out_wav):
92
- """Stabilisation pour flux webcam et extraction audio."""
93
  tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
94
  os.close(tmp_fd)
95
- # Correction WebM/Webcam : réencodage libx264 forcé
96
  run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -c:v libx264 -preset ultrafast -crf 23 -c:a aac {shlex.quote(stabilized_mp4)}')
97
  run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}')
98
  if os.path.exists(stabilized_mp4): os.remove(stabilized_mp4)
@@ -100,21 +86,45 @@ def extract_audio(video_path, out_wav):
100
  def clean_audio(wav_path):
101
  audio, sr = sf.read(wav_path)
102
  if audio.ndim == 2: audio = audio.mean(axis=1)
103
- if sr != 16000: audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=16000)
 
104
  max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
105
  if max_val > 1e-6: audio = audio / max_val * 0.9
106
  clean_path = wav_path.replace(".wav", "_clean.wav")
107
  sf.write(clean_path, audio, 16000)
108
  return clean_path, audio, 16000
109
 
110
- # ---------------------------- # PIPELINE PRINCIPAL # ----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  def pipeline(video_input, model_name):
113
  try:
114
- if not video_input: return "❌ Vidéo introuvable. Veuillez réessayer.", None
115
  video_path = video_input
116
 
117
- yield "⏳ Phase 1/3 : Analyse du fichier et extraction audio...", None
118
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
119
  wav_path = tf.name
120
 
@@ -122,66 +132,42 @@ def pipeline(video_input, model_name):
122
  clean_wav, audio, sr = clean_audio(wav_path)
123
  duration = ffprobe_duration(video_path) or (len(audio)/sr)
124
 
125
- yield f"⏳ Phase 2/3 : Transcription IA avec {model_name}...", None
126
  model = load_model(model_name)
127
- text_out = model.transcribe([clean_wav])[0]
128
- text_str = text_out.text if hasattr(text_out, 'text') else str(text_out)
 
129
 
130
- words = [w for w in text_str.split() if len(w) > 1]
131
- if not words: return "⚠️ Aucune parole détectée dans la vidéo.", None
132
 
133
  yield "⏳ Phase 3/3 : Incrustation des sous-titres...", None
134
- # Heuristique d'alignement simple
135
- subs = []
136
- chunk_size = 7
137
- for i in range(0, len(words), chunk_size):
138
- chunk = words[i:i+chunk_size]
139
- s = (i / len(words)) * duration
140
- e = (min(i + chunk_size, len(words)) / len(words)) * duration
141
- subs.append((s, e, "\n".join(textwrap.wrap(" ".join(chunk), 40))))
142
-
143
- res_v = burn(video_path, subs)
144
- yield "✅ Succès ! Votre vidéo est prête.", res_v
145
  except Exception as e:
146
  traceback.print_exc()
147
- yield f"❌ Erreur : {str(e)}", None
148
-
149
- def burn(video_path, subs):
150
- out_path = "RobotsMali_Subtitled.mp4"
151
- with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
152
- for idx, (start, end, text) in enumerate(subs, 1):
153
- def t_srt(sec):
154
- h=int(sec//3600); m=int((sec%3600)//60); s=int(sec%60); ms=int((sec-int(sec))*1000)
155
- return f"{h:02}:{m:02}:{s:02},{ms:03}"
156
- tf.write(f"{idx}\n{t_srt(start)} --> {t_srt(end)}\n{text}\n\n")
157
- srt_name = tf.name
158
-
159
- vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=24,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
160
- run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -preset fast -crf 23 -c:a aac {shlex.quote(out_path)}')
161
- os.remove(srt_name)
162
- return out_path
163
 
164
- # ---------------------------- # INTERFACE ARTISTIQUE # ----------------------------
165
 
166
  custom_css = """
167
  body { background-color: #0b0e14; }
168
- .gradio-container { background: rgba(17, 25, 40, 0.8) !important; backdrop-filter: blur(12px); border-radius: 20px; border: 1px solid rgba(255, 255, 255, 0.1); box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37); }
169
- #title-block { text-align: center; padding: 20px; }
170
- .gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; font-weight: bold !important; transition: all 0.3s ease !important; }
171
- .gr-button-primary:hover { transform: scale(1.02); box-shadow: 0 0 15px rgba(16, 185, 129, 0.4); }
172
  """
173
 
174
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
175
- with gr.Column(elem_id="title-block"):
176
  gr.HTML("""
177
- <h1 style='color:#facc15; font-size: 2.5rem; margin-bottom:0;'>🤖 ROBOTSMALI</h1>
178
- <p style='color:#94a3b8; font-size: 1.1rem;'>Intelligence Artificielle pour la Langue Bambara</p>
179
  <div style="height: 3px; width: 60px; background: #facc15; margin: 15px auto;"></div>
180
  """)
181
 
182
  with gr.Row():
183
  with gr.Column():
184
- gr.Markdown("### 📥 Source Vidéo")
185
  v_in = gr.Video(label=None, mirror_webcam=False)
186
  m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle IA")
187
  btn = gr.Button("🚀 GÉNÉRER LES SOUS-TITRES", variant="primary")
@@ -191,14 +177,15 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
191
  status = gr.Markdown("*En attente de traitement...*")
192
  v_out = gr.Video(label=None)
193
 
194
- # Section Exemples
195
  gr.Examples(
196
  examples=VIDEO_EXAMPLES,
197
  inputs=[v_in, m_sel],
198
- label="📺 Sélectionner une vidéo d'exemple"
 
199
  )
200
 
201
- gr.HTML("<div style='text-align: center; color: #475569; margin-top: 30px; font-size: 0.9rem;'>© 2025 RobotsMali Bamako, Mali</div>")
202
 
203
  btn.click(pipeline, [v_in, m_sel], [status, v_out])
204
 
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ ROBOTSMALI — Sous-titrage Bambara (V5.6 - Production Final)
4
+ Logiciel de transcription et d'incrustation vidéo pour le Bambara.
 
 
5
  """
6
  import os
7
  import shlex
 
20
  from nemo.collections import asr as nemo_asr
21
  import gradio as gr
22
 
23
+ # ---------------------------- # CONFIGURATION IA # ----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
25
  random.seed(1234)
26
  np.random.seed(1234)
27
  torch.manual_seed(1234)
28
 
 
 
29
  MODELS = {
30
  "Soloni V1 (RNNT)": ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
31
  "Soloni V0 (RNNT)": ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
 
35
  "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
36
  }
37
 
38
+ # Chemin vers la vidéo d'exemple
39
+ VIDEO_EXAMPLES = [
40
+ ["examples/MARALINKE.mp4", "Soloba V1 (CTC)"]
41
+ ]
42
 
43
  _cache = {}
44
 
 
47
  def run_cmd(cmd):
48
  res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
49
  if res.returncode != 0:
50
+ raise RuntimeError(f"Erreur système: {res.stdout}")
51
  return res.stdout
52
 
53
  def ffprobe_duration(path):
 
75
  return model
76
 
77
  def extract_audio(video_path, out_wav):
78
+ """Prépare la vidéo (H.264) et extrait l'audio 16kHz."""
79
  tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
80
  os.close(tmp_fd)
81
+ # Réencodage pour supporter tous les formats (Webcam/WebM compris)
82
  run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -c:v libx264 -preset ultrafast -crf 23 -c:a aac {shlex.quote(stabilized_mp4)}')
83
  run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}')
84
  if os.path.exists(stabilized_mp4): os.remove(stabilized_mp4)
 
86
  def clean_audio(wav_path):
87
  audio, sr = sf.read(wav_path)
88
  if audio.ndim == 2: audio = audio.mean(axis=1)
89
+ if sr != 16000:
90
+ audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=16000)
91
  max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
92
  if max_val > 1e-6: audio = audio / max_val * 0.9
93
  clean_path = wav_path.replace(".wav", "_clean.wav")
94
  sf.write(clean_path, audio, 16000)
95
  return clean_path, audio, 16000
96
 
97
+ # ---------------------------- # TRANSCRIPTION & BURNING # ----------------------------
98
+
99
+ def burn_subtitles(video_path, words, duration):
100
+ """Crée un fichier SRT et l'incruste dans la vidéo."""
101
+ out_path = "RobotsMali_Subtitled.mp4"
102
+ chunk_size = 7
103
+ with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
104
+ for i, idx in enumerate(range(0, len(words), chunk_size)):
105
+ chunk = words[idx : idx + chunk_size]
106
+ start = (idx / len(words)) * duration
107
+ end = (min(idx + chunk_size, len(words)) / len(words)) * duration
108
+
109
+ def t_srt(sec):
110
+ h=int(sec//3600); m=int((sec%3600)//60); s=int(sec%60); ms=int((sec-int(sec))*1000)
111
+ return f"{h:02}:{m:02}:{s:02},{ms:03}"
112
+
113
+ txt = "\n".join(textwrap.wrap(" ".join(chunk), 40))
114
+ tf.write(f"{i+1}\n{t_srt(start)} --> {t_srt(end)}\n{txt}\n\n")
115
+ srt_name = tf.name
116
+
117
+ vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
118
+ run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -crf 23 -c:a aac {shlex.quote(out_path)}')
119
+ os.remove(srt_name)
120
+ return out_path
121
 
122
  def pipeline(video_input, model_name):
123
  try:
124
+ if not video_input: return "❌ Aucune vidéo", None
125
  video_path = video_input
126
 
127
+ yield "⏳ Phase 1/3 : Analyse du fichier...", None
128
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
129
  wav_path = tf.name
130
 
 
132
  clean_wav, audio, sr = clean_audio(wav_path)
133
  duration = ffprobe_duration(video_path) or (len(audio)/sr)
134
 
135
+ yield f"⏳ Phase 2/3 : Transcription IA ({model_name})...", None
136
  model = load_model(model_name)
137
+ res = model.transcribe([clean_wav])[0]
138
+ text = res.text if hasattr(res, 'text') else str(res)
139
+ words = [w for w in text.split() if len(w) > 1]
140
 
141
+ if not words: return "⚠️ Pas de parole détectée", None
 
142
 
143
  yield "⏳ Phase 3/3 : Incrustation des sous-titres...", None
144
+ final_video = burn_subtitles(video_path, words, duration)
145
+
146
+ yield "✅ Succès !", final_video
 
 
 
 
 
 
 
 
147
  except Exception as e:
148
  traceback.print_exc()
149
+ yield f"❌ Erreur: {str(e)}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ # ---------------------------- # INTERFACE GRADIO # ----------------------------
152
 
153
  custom_css = """
154
  body { background-color: #0b0e14; }
155
+ .gradio-container { background: rgba(17, 25, 40, 0.8) !important; backdrop-filter: blur(12px); border-radius: 20px; border: 1px solid rgba(255, 255, 255, 0.1); }
156
+ #title-container { text-align: center; padding: 20px; }
157
+ .gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; }
 
158
  """
159
 
160
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
161
+ with gr.Column(elem_id="title-container"):
162
  gr.HTML("""
163
+ <h1 style='color:#facc15; font-size: 2.5rem; margin:0;'>🤖 ROBOTSMALI</h1>
164
+ <p style='color:#94a3b8; font-style:italic;'>Intelligence Artificielle pour le Bambara</p>
165
  <div style="height: 3px; width: 60px; background: #facc15; margin: 15px auto;"></div>
166
  """)
167
 
168
  with gr.Row():
169
  with gr.Column():
170
+ gr.Markdown("### 📥 Source")
171
  v_in = gr.Video(label=None, mirror_webcam=False)
172
  m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle IA")
173
  btn = gr.Button("🚀 GÉNÉRER LES SOUS-TITRES", variant="primary")
 
177
  status = gr.Markdown("*En attente de traitement...*")
178
  v_out = gr.Video(label=None)
179
 
180
+ # Section des exemples avec cache_examples=False pour débloquer le clic
181
  gr.Examples(
182
  examples=VIDEO_EXAMPLES,
183
  inputs=[v_in, m_sel],
184
+ label="📺 Exemples Disponibles",
185
+ cache_examples=False
186
  )
187
 
188
+ gr.HTML("<div style='text-align: center; color: #475569; padding-top: 20px;'>© 2025 RobotsMali - Bamako</div>")
189
 
190
  btn.click(pipeline, [v_in, m_sel], [status, v_out])
191