binaryMao commited on
Commit
173bdc2
·
verified ·
1 Parent(s): d436569

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -48
app.py CHANGED
@@ -5,7 +5,7 @@ from huggingface_hub import snapshot_download
5
  from nemo.collections import asr as nemo_asr
6
  import gradio as gr
7
 
8
- # 1. CONFIGURATION MATÉRIEL ET LISTE DES MODÈLES ROBOTSMALI
9
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
  MODELS = {
@@ -23,24 +23,17 @@ MODELS = {
23
  "Traduction Soloni (ST)": ("RobotsMali/st-soloni-114m-tdt-ctc", "rnnt"),
24
  }
25
 
26
- # --- OPTIMISATION : DETECTION DE LA VIDEO DANS LE DOSSIER EXAMPLES ---
27
  def find_example_video():
28
- # Liste des noms possibles basés sur ta capture d'écran
29
- paths = [
30
- "examples/MARALINKE_FIXED.mp4",
31
- "examples/MARALINKE.mp4",
32
- "MARALINKE.mp4"
33
- ]
34
  for p in paths:
35
- if os.path.exists(p):
36
- return p
37
  return None
38
 
39
  EXAMPLE_PATH = find_example_video()
40
-
41
  _cache = {}
42
 
43
- # 2. GESTION DE LA MÉMOIRE ET CHARGEMENT DU MODÈLE
44
  def clear_memory():
45
  _cache.clear()
46
  gc.collect()
@@ -52,13 +45,9 @@ def get_model(name):
52
  clear_memory()
53
  repo, _ = MODELS[name]
54
 
55
- print(f"📥 Téléchargement de {repo}...")
56
  folder = snapshot_download(repo, local_dir_use_symlinks=False)
57
  nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
58
 
59
- if not nemo_file: raise FileNotFoundError("Fichier .nemo introuvable.")
60
-
61
- # Optimisation RobotsMali : Connecteur flexible pour éviter l'erreur state_dict
62
  from nemo.core.connectors.save_restore_connector import SaveRestoreConnector
63
  model = nemo_asr.models.ASRModel.restore_from(
64
  nemo_file,
@@ -73,48 +62,44 @@ def get_model(name):
73
  _cache[name] = model
74
  return model
75
 
76
- # 3. UTILITAIRES
77
  def format_srt_time(sec):
78
  td = time.gmtime(sec)
79
  ms = int((sec - int(sec)) * 1000)
80
  return f"{time.strftime('%H:%M:%S', td)},{ms:03}"
81
 
82
- # 4. PIPELINE DE TRANSCRIPTION
83
  def pipeline(video_in, model_name):
84
  tmp_dir = tempfile.mkdtemp()
85
  try:
86
- if not video_in: return "❌ Veuillez sélectionner une vidéo.", None
87
 
88
- # A. Extraction Audio
89
- yield " Phase 1/4 : Extraction audio...", None
90
  full_wav = os.path.join(tmp_dir, "full.wav")
91
- subprocess.run(f"ffmpeg -y -i {shlex.quote(video_in)} -vn -ac 1 -ar 16000 {full_wav}", shell=True, check=True)
92
 
93
- # B. Segmentation
94
- yield "⏳ Phase 2/4 : Segmentation (20s)...", None
95
  subprocess.run(f"ffmpeg -i {full_wav} -f segment -segment_time 20 -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True, check=True)
96
  audio_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
97
 
98
- # C. Transcription IA
99
- yield f" Phase 3/4 : Chargement de {model_name}...", None
100
  model = get_model(model_name)
101
 
 
 
 
102
  all_words_ts = []
103
- for idx, seg_path in enumerate(audio_segments):
104
  base_time = idx * 20
105
- yield f"🎙️ Transcription segment {idx+1}/{len(audio_segments)}...", None
106
- hyp = model.transcribe([seg_path], return_hypotheses=True)[0]
107
  if isinstance(hyp, list): hyp = hyp[0]
108
  text = hyp.text if hasattr(hyp, 'text') else str(hyp)
109
  words = text.split()
110
-
111
- # Répartition temporelle
112
  gap = 20.0 / max(len(words), 1)
113
  for i, w in enumerate(words):
114
  all_words_ts.append({"word": w, "start": base_time + (i * gap), "end": base_time + ((i+1) * gap)})
115
 
116
- # D. Génération SRT et Rendu Vidéo
117
- yield "⏳ Phase 4/4 : Incrustation sous-titres...", None
118
  srt_path = os.path.join(tmp_dir, "final.srt")
119
  with open(srt_path, "w", encoding="utf-8") as f:
120
  for i in range(0, len(all_words_ts), 6):
@@ -122,40 +107,36 @@ def pipeline(video_in, model_name):
122
  f.write(f"{(i//6)+1}\n{format_srt_time(chunk[0]['start'])} --> {format_srt_time(chunk[-1]['end'])}\n")
123
  f.write(" ".join([c['word'] for c in chunk]) + "\n\n")
124
 
125
- out_path = os.path.abspath(f"robotsmali_result_{int(time.time())}.mp4")
 
 
126
  safe_srt = srt_path.replace("\\", "/").replace(":", "\\:")
127
 
128
- # Style : Jaune, Taille 18, Centré en bas
129
- cmd = f"ffmpeg -y -i {shlex.quote(video_in)} -vf \"subtitles='{safe_srt}':force_style='Alignment=2,FontSize=18,PrimaryColour=&H00FFFF'\" -c:v libx264 -preset ultrafast -c:a aac {out_path}"
130
  subprocess.run(cmd, shell=True, check=True)
131
 
132
- yield "✅ Terminé !", out_path
133
 
134
  except Exception as e:
135
  yield f"❌ Erreur : {str(e)}", None
136
  finally:
137
  if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
138
 
139
- # 5. INTERFACE GRADIO
140
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
141
- gr.HTML("<div style='text-align:center;'><h1>🤖 RobotsMali Speech Laboratory</h1><p>Testez nos modèles de transcription et traduction</p></div>")
142
 
143
  with gr.Row():
144
  with gr.Column():
145
- v_input = gr.Video(label="Vidéo")
146
  m_input = gr.Dropdown(choices=list(MODELS.keys()), value="Soloba V3 (CTC)", label="Modèle")
147
- run_btn = gr.Button("🚀 GÉNÉRER", variant="primary")
148
 
149
- # --- AFFICHAGE DE L'EXEMPLE SI TROUVÉ ---
150
  if EXAMPLE_PATH:
151
- gr.Examples(
152
- examples=[[EXAMPLE_PATH, "Soloba V3 (CTC)"]],
153
- inputs=[v_input, m_input],
154
- label="Vidéo d'exemple"
155
- )
156
 
157
  with gr.Column():
158
- status = gr.Markdown("### État\nPrêt.")
159
  v_output = gr.Video(label="Résultat")
160
 
161
  run_btn.click(pipeline, [v_input, m_input], [status, v_output])
 
5
  from nemo.collections import asr as nemo_asr
6
  import gradio as gr
7
 
8
+ # 1. CONFIGURATION ET MODÈLES
9
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
  MODELS = {
 
23
  "Traduction Soloni (ST)": ("RobotsMali/st-soloni-114m-tdt-ctc", "rnnt"),
24
  }
25
 
26
+ # Détection de la vidéo d'exemple dans le dossier local
27
  def find_example_video():
28
+ paths = ["examples/MARALINKE_FIXED.mp4", "examples/MARALINKE.mp4", "MARALINKE.mp4"]
 
 
 
 
 
29
  for p in paths:
30
+ if os.path.exists(p): return p
 
31
  return None
32
 
33
  EXAMPLE_PATH = find_example_video()
 
34
  _cache = {}
35
 
36
+ # 2. OPTIMISATION CHARGEMENT ET MÉMOIRE
37
  def clear_memory():
38
  _cache.clear()
39
  gc.collect()
 
45
  clear_memory()
46
  repo, _ = MODELS[name]
47
 
 
48
  folder = snapshot_download(repo, local_dir_use_symlinks=False)
49
  nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
50
 
 
 
 
51
  from nemo.core.connectors.save_restore_connector import SaveRestoreConnector
52
  model = nemo_asr.models.ASRModel.restore_from(
53
  nemo_file,
 
62
  _cache[name] = model
63
  return model
64
 
65
+ # 3. TRANSCRIPTION ET RENDU
66
  def format_srt_time(sec):
67
  td = time.gmtime(sec)
68
  ms = int((sec - int(sec)) * 1000)
69
  return f"{time.strftime('%H:%M:%S', td)},{ms:03}"
70
 
 
71
  def pipeline(video_in, model_name):
72
  tmp_dir = tempfile.mkdtemp()
73
  try:
74
+ if not video_in: return "❌ Vidéo absente", None
75
 
76
+ # FFmpeg optimisé (-threads 0)
77
+ yield "🚀 Phase 1 : Extraction Audio...", None
78
  full_wav = os.path.join(tmp_dir, "full.wav")
79
+ subprocess.run(f"ffmpeg -y -threads 0 -i {shlex.quote(video_in)} -vn -ac 1 -ar 16000 {full_wav}", shell=True, check=True)
80
 
81
+ yield "🚀 Phase 2 : Segmentation...", None
 
82
  subprocess.run(f"ffmpeg -i {full_wav} -f segment -segment_time 20 -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True, check=True)
83
  audio_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
84
 
85
+ # Inférence par BATCH (beaucoup plus rapide)
86
+ yield f"🎙️ Phase 3 : Transcription Batch avec {model_name}...", None
87
  model = get_model(model_name)
88
 
89
+ #
90
+ batch_hypotheses = model.transcribe(audio_segments, batch_size=4, return_hypotheses=True)
91
+
92
  all_words_ts = []
93
+ for idx, hyp in enumerate(batch_hypotheses):
94
  base_time = idx * 20
 
 
95
  if isinstance(hyp, list): hyp = hyp[0]
96
  text = hyp.text if hasattr(hyp, 'text') else str(hyp)
97
  words = text.split()
 
 
98
  gap = 20.0 / max(len(words), 1)
99
  for i, w in enumerate(words):
100
  all_words_ts.append({"word": w, "start": base_time + (i * gap), "end": base_time + ((i+1) * gap)})
101
 
102
+ # Génération SRT
 
103
  srt_path = os.path.join(tmp_dir, "final.srt")
104
  with open(srt_path, "w", encoding="utf-8") as f:
105
  for i in range(0, len(all_words_ts), 6):
 
107
  f.write(f"{(i//6)+1}\n{format_srt_time(chunk[0]['start'])} --> {format_srt_time(chunk[-1]['end'])}\n")
108
  f.write(" ".join([c['word'] for c in chunk]) + "\n\n")
109
 
110
+ # Rendu Final FFmpeg accéléré
111
+ yield "🚀 Phase 4 : Encodage Vidéo...", None
112
+ out_path = os.path.abspath(f"robotsmali_final_{int(time.time())}.mp4")
113
  safe_srt = srt_path.replace("\\", "/").replace(":", "\\:")
114
 
115
+ cmd = f"ffmpeg -y -threads 0 -i {shlex.quote(video_in)} -vf \"subtitles='{safe_srt}':force_style='Alignment=2,FontSize=18,PrimaryColour=&H00FFFF'\" -c:v libx264 -preset superfast -c:a copy {out_path}"
 
116
  subprocess.run(cmd, shell=True, check=True)
117
 
118
+ yield "✅ Succès !", out_path
119
 
120
  except Exception as e:
121
  yield f"❌ Erreur : {str(e)}", None
122
  finally:
123
  if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
124
 
125
+ # 4. INTERFACE GRADIO
126
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
127
+ gr.HTML("<div style='text-align:center;'><h1>🤖 RobotsMali Speech Lab</h1></div>")
128
 
129
  with gr.Row():
130
  with gr.Column():
131
+ v_input = gr.Video(label="Source")
132
  m_input = gr.Dropdown(choices=list(MODELS.keys()), value="Soloba V3 (CTC)", label="Modèle")
133
+ run_btn = gr.Button("🚀 LANCER LA TRANSCRIPTION", variant="primary")
134
 
 
135
  if EXAMPLE_PATH:
136
+ gr.Examples(examples=[[EXAMPLE_PATH, "Soloba V3 (CTC)"]], inputs=[v_input, m_input])
 
 
 
 
137
 
138
  with gr.Column():
139
+ status = gr.Markdown("Prêt.")
140
  v_output = gr.Video(label="Résultat")
141
 
142
  run_btn.click(pipeline, [v_input, m_input], [status, v_output])