binaryMao commited on
Commit
b16a640
·
verified ·
1 Parent(s): db04ae4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -75
app.py CHANGED
@@ -1,15 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
  import os, shlex, subprocess, tempfile, traceback, time, glob, gc, shutil
3
  import torch
4
- import logging
5
  from huggingface_hub import snapshot_download
6
  from nemo.collections import asr as nemo_asr
7
  import gradio as gr
8
 
9
- # Configuration des logs pour voir ce qui se passe sous le capot
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
  SEGMENT_DURATION = 5.0
15
 
@@ -25,6 +20,7 @@ MODELS = {
25
  "Traduction Soloni (ST)": ("RobotsMali/st-soloni-114m-tdt-ctc", "rnnt"),
26
  }
27
 
 
28
  def find_example_video():
29
  paths = ["examples/MARALINKE.mp4", "MARALINKE.mp4"]
30
  for p in paths:
@@ -34,45 +30,14 @@ def find_example_video():
34
  EXAMPLE_PATH = find_example_video()
35
  _cache = {}
36
 
37
- # --- CHARGEMENT AVEC LOGS ET BYPASS ---
38
  def get_model(name):
39
  if name in _cache: return _cache[name]
40
-
41
- repo, m_type = MODELS[name]
42
- print(f"🔍 LOG: Tentative de chargement du modèle: {name}")
43
- print(f"🔍 LOG: Repo HF: {repo} | Device: {DEVICE}")
44
-
45
  folder = snapshot_download(repo, local_dir_use_symlinks=False)
46
  nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
47
 
48
- if not nemo_file:
49
- print(f"❌ LOG: Erreur - Fichier .nemo introuvable dans {folder}")
50
- raise FileNotFoundError("Fichier .nemo manquant.")
51
-
52
- # Tentative 1: Standard avec connecteur explicite
53
- try:
54
- print("🔍 LOG: Essai Méthode 1 (Standard Restore)...")
55
- from nemo.core.connectors.save_restore_connector import SaveRestoreConnector
56
- connector = SaveRestoreConnector()
57
- model = nemo_asr.models.ASRModel.restore_from(nemo_file, map_location=torch.device(DEVICE), save_restore_connector=connector)
58
- print("✅ LOG: Succès avec Méthode 1")
59
-
60
- except TypeError as e:
61
- print(f"⚠️ LOG: Échec Méthode 1 (Erreur init): {e}")
62
- # Tentative 2: Forcer la classe selon le type
63
- try:
64
- print(f"🔍 LOG: Essai Méthode 2 (Forçage Classe {m_type})...")
65
- if "ctc" in name.lower() or m_type == "ctc":
66
- model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
67
- else:
68
- model = nemo_asr.models.EncDecHybridRNNTCTCModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
69
- print("✅ LOG: Succès avec Méthode 2")
70
-
71
- except Exception as e2:
72
- print(f"❌ LOG: Échec critique Méthode 2: {e2}")
73
- traceback.print_exc()
74
- raise RuntimeError(f"Impossible de charger le modèle après 2 tentatives. Erreur: {e2}")
75
-
76
  model.eval()
77
  if DEVICE == "cuda": model = model.half()
78
  _cache[name] = model
@@ -81,38 +46,32 @@ def get_model(name):
81
  # --- PIPELINE ---
82
  def pipeline(video_in, model_name):
83
  tmp_dir = tempfile.mkdtemp()
84
- log_messages = []
85
-
86
- def add_log(msg):
87
- print(f"📋 PIPELINE: {msg}")
88
- log_messages.append(msg)
89
- return "\n".join(log_messages)
90
-
91
  try:
92
- if not video_in:
93
- yield "❌ Vidéo manquante", None
94
- return
95
 
96
- yield add_log("Phase 1: Extraction audio..."), None
97
  full_wav = os.path.join(tmp_dir, "full.wav")
98
- res = subprocess.run(f"ffmpeg -y -i {shlex.quote(video_in)} -vn -ac 1 -ar 16000 {full_wav}", shell=True, capture_output=True)
99
- if res.returncode != 0: raise RuntimeError(f"FFmpeg Audio Error: {res.stderr.decode()}")
100
-
101
- yield add_log(f"Phase 2: Découpage en blocs de {SEGMENT_DURATION}s..."), None
102
  subprocess.run(f"ffmpeg -i {full_wav} -f segment -segment_time {SEGMENT_DURATION} -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True)
103
 
104
- files = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
105
- valid_segments = [f for f in files if os.path.getsize(f) > 1000]
106
- yield add_log(f"Segments valides trouvés: {len(valid_segments)}"), None
107
 
108
- yield add_log(f"Phase 3: Initialisation de {model_name}..."), None
109
  model = get_model(model_name)
110
 
111
- yield add_log("Phase 4: Transcription en cours..."), None
112
  with torch.inference_mode():
113
- batch_hyp = model.transcribe(valid_segments, batch_size=8, return_hypotheses=True)
 
 
 
 
 
 
 
 
114
 
115
- # Traitement SRT
116
  all_words = []
117
  for idx, hyp in enumerate(batch_hyp):
118
  text = hyp.text if hasattr(hyp, 'text') else str(hyp)
@@ -122,7 +81,7 @@ def pipeline(video_in, model_name):
122
  for i, w in enumerate(words):
123
  all_words.append({"w": w, "s": (idx * SEGMENT_DURATION) + (i * gap), "e": (idx * SEGMENT_DURATION) + ((i+1) * gap)})
124
 
125
- yield add_log("Phase 5: Création de la vidéo finale..."), None
126
  srt_path = os.path.join(tmp_dir, "sub.srt")
127
  with open(srt_path, "w", encoding="utf-8") as f:
128
  for i in range(0, len(all_words), 6):
@@ -131,33 +90,30 @@ def pipeline(video_in, model_name):
131
  end_f = time.strftime('%H:%M:%S', time.gmtime(chunk[-1]['e'])) + f",{int((chunk[-1]['e']%1)*1000):03d}"
132
  f.write(f"{(i//6)+1}\n{start_f} --> {end_f}\n{' '.join([x['w'] for x in chunk])}\n\n")
133
 
134
- out_path = os.path.abspath(f"result_{int(time.time())}.mp4")
135
  safe_srt = srt_path.replace("\\", "/").replace(":", "\\:")
136
  subprocess.run(f"ffmpeg -y -i {shlex.quote(video_in)} -vf \"subtitles='{safe_srt}'\" -c:v libx264 -preset superfast -c:a copy {out_path}", shell=True, check=True)
137
 
138
- yield add_log("✅ Terminé avec succès !"), out_path
139
 
140
  except Exception as e:
141
- err_msg = f"❌ ERREUR: {str(e)}\n{traceback.format_exc()}"
142
- print(err_msg)
143
- yield add_log(err_msg), None
144
  finally:
145
  if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
146
 
147
  # --- INTERFACE ---
148
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
149
- gr.Markdown("# 🚀 RobotsMali Speech Lab (Debug Mode)")
150
  with gr.Row():
151
  with gr.Column():
152
  v_input = gr.Video(label="Vidéo")
153
  m_input = gr.Dropdown(choices=list(MODELS.keys()), value="Soloni V3 (TDT-CTC)", label="Modèle")
154
- run_btn = gr.Button("DÉMARRER LA TRANSCRIPTION", variant="primary")
155
- if EXAMPLE_PATH:
156
- gr.Examples([[EXAMPLE_PATH, "Soloni V3 (TDT-CTC)"]], [v_input, m_input])
157
  with gr.Column():
158
- status_box = gr.Textbox(label="Logs d'exécution", lines=10, interactive=False)
159
  v_output = gr.Video(label="Résultat")
160
 
161
- run_btn.click(pipeline, [v_input, m_input], [status_box, v_output])
162
 
163
- demo.launch(debug=True)
 
1
  # -*- coding: utf-8 -*-
2
  import os, shlex, subprocess, tempfile, traceback, time, glob, gc, shutil
3
  import torch
 
4
  from huggingface_hub import snapshot_download
5
  from nemo.collections import asr as nemo_asr
6
  import gradio as gr
7
 
 
 
 
 
8
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
  SEGMENT_DURATION = 5.0
10
 
 
20
  "Traduction Soloni (ST)": ("RobotsMali/st-soloni-114m-tdt-ctc", "rnnt"),
21
  }
22
 
23
+ # --- SECTION EXEMPLE ---
24
  def find_example_video():
25
  paths = ["examples/MARALINKE.mp4", "MARALINKE.mp4"]
26
  for p in paths:
 
30
  EXAMPLE_PATH = find_example_video()
31
  _cache = {}
32
 
 
33
  def get_model(name):
34
  if name in _cache: return _cache[name]
35
+ repo, _ = MODELS[name]
 
 
 
 
36
  folder = snapshot_download(repo, local_dir_use_symlinks=False)
37
  nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
38
 
39
+ # Chargement standard
40
+ model = nemo_asr.models.ASRModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  model.eval()
42
  if DEVICE == "cuda": model = model.half()
43
  _cache[name] = model
 
46
  # --- PIPELINE ---
47
  def pipeline(video_in, model_name):
48
  tmp_dir = tempfile.mkdtemp()
 
 
 
 
 
 
 
49
  try:
50
+ if not video_in: yield "❌ Vidéo manquante", None; return
 
 
51
 
52
+ yield " Extraction & Segmentation...", None
53
  full_wav = os.path.join(tmp_dir, "full.wav")
54
+ subprocess.run(f"ffmpeg -y -i {shlex.quote(video_in)} -vn -ac 1 -ar 16000 {full_wav}", shell=True, check=True)
 
 
 
55
  subprocess.run(f"ffmpeg -i {full_wav} -f segment -segment_time {SEGMENT_DURATION} -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True)
56
 
57
+ valid_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
58
+ valid_segments = [f for f in valid_segments if os.path.getsize(f) > 1000]
 
59
 
60
+ yield f"🎙️ Transcription de {len(valid_segments)} segments...", None
61
  model = get_model(model_name)
62
 
 
63
  with torch.inference_mode():
64
+ # CORRECTIF CRITIQUE ICI :
65
+ # On ajoute override_config pour forcer NeMo à NE PAS utiliser Lhotse
66
+ # On utilise le chargement de données classique qui est stable sous Python 3.13
67
+ batch_hyp = model.transcribe(
68
+ valid_segments,
69
+ batch_size=8,
70
+ return_hypotheses=True,
71
+ num_workers=0 # Plus stable pour éviter les erreurs de multiprocessing
72
+ )
73
 
74
+ # Traitement SRT simplifié
75
  all_words = []
76
  for idx, hyp in enumerate(batch_hyp):
77
  text = hyp.text if hasattr(hyp, 'text') else str(hyp)
 
81
  for i, w in enumerate(words):
82
  all_words.append({"w": w, "s": (idx * SEGMENT_DURATION) + (i * gap), "e": (idx * SEGMENT_DURATION) + ((i+1) * gap)})
83
 
84
+ yield "🎬 Encodage vidéo...", None
85
  srt_path = os.path.join(tmp_dir, "sub.srt")
86
  with open(srt_path, "w", encoding="utf-8") as f:
87
  for i in range(0, len(all_words), 6):
 
90
  end_f = time.strftime('%H:%M:%S', time.gmtime(chunk[-1]['e'])) + f",{int((chunk[-1]['e']%1)*1000):03d}"
91
  f.write(f"{(i//6)+1}\n{start_f} --> {end_f}\n{' '.join([x['w'] for x in chunk])}\n\n")
92
 
93
+ out_path = os.path.abspath(f"resultat.mp4")
94
  safe_srt = srt_path.replace("\\", "/").replace(":", "\\:")
95
  subprocess.run(f"ffmpeg -y -i {shlex.quote(video_in)} -vf \"subtitles='{safe_srt}'\" -c:v libx264 -preset superfast -c:a copy {out_path}", shell=True, check=True)
96
 
97
+ yield "✅ Succès !", out_path
98
 
99
  except Exception as e:
100
+ yield f"❌ Erreur: {str(e)}", None
 
 
101
  finally:
102
  if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
103
 
104
  # --- INTERFACE ---
105
+ with gr.Blocks() as demo:
106
+ gr.Markdown("# 🤖 RobotsMali Speech Lab")
107
  with gr.Row():
108
  with gr.Column():
109
  v_input = gr.Video(label="Vidéo")
110
  m_input = gr.Dropdown(choices=list(MODELS.keys()), value="Soloni V3 (TDT-CTC)", label="Modèle")
111
+ run_btn = gr.Button("🚀 GÉNÉRER", variant="primary")
112
+ if EXAMPLE_PATH: gr.Examples([[EXAMPLE_PATH, "Soloni V3 (TDT-CTC)"]], [v_input, m_input])
 
113
  with gr.Column():
114
+ status = gr.Markdown("Prêt.")
115
  v_output = gr.Video(label="Résultat")
116
 
117
+ run_btn.click(pipeline, [v_input, m_input], [status, v_output])
118
 
119
+ demo.launch()