binaryMao commited on
Commit
ba79116
·
verified ·
1 Parent(s): 5839b85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -40
app.py CHANGED
@@ -1,9 +1,4 @@
1
- # -*- coding: utf-8 -*-
2
- # POUR GOOGLE COLAB, EXÉCUTEZ CES CELLULES AVANT DE LANCER LE SCRIPT :
3
- # !apt-get install -y ffmpeg
4
- # !pip install gradio huggingface_hub torch
5
- # !pip install git+https://github.com/NVIDIA/NeMo.git@main#egg=nemo_toolkit[all]
6
- #
7
 
8
  import os, shlex, subprocess, tempfile, traceback, time, glob, gc, shutil
9
  import torch
@@ -63,15 +58,22 @@ def get_model(name):
63
 
64
  if not nemo_file: raise FileNotFoundError("Fichier .nemo introuvable.")
65
 
66
- from nemo.core.connectors.save_restore_connector import SaveRestoreConnector
67
-
68
- # Correctif pour les clés "embedding_model" inattendues
69
- model = nemo_asr.models.ASRModel.restore_from(
70
- nemo_file,
71
- map_location=torch.device(DEVICE),
72
- save_restore_connector=SaveRestoreConnector(),
73
- strict=False
74
- )
 
 
 
 
 
 
 
75
 
76
  model.to(DEVICE).eval()
77
  if DEVICE == "cuda":
@@ -166,33 +168,15 @@ def pipeline(video_in, model_name):
166
  full_wav = os.path.join(tmp_dir, "full.wav")
167
  subprocess.run(f"ffmpeg -y -threads 0 -i {shlex.quote(video_in)} -vn -ac 1 -ar 16000 {full_wav}", shell=True, check=True)
168
 
169
- yield "⏳ Phase 2/4 : Segmentation Intelligente...", None
 
 
 
 
170
 
171
- # Tentative de segmentation intelligente
172
- try:
173
- cut_points = smart_segment_audio(full_wav, target_duration=5.0)
174
- except Exception as e:
175
- print(f"Warning smart segment: {e}")
176
- cut_points = None
177
-
178
  segment_files = []
179
- if cut_points:
180
- # Découpage selon les points calculés
181
- for i in range(len(cut_points)-1):
182
- start = cut_points[i]
183
- duration = cut_points[i+1] - start
184
- out_name = os.path.join(tmp_dir, f"seg_{i:03d}.wav")
185
- subprocess.run(
186
- f"ffmpeg -y -ss {start:.3f} -t {duration:.3f} -i {full_wav} -c copy {out_name}",
187
- shell=True, check=True
188
- )
189
- segment_files.append({"file": out_name, "start_offset": start})
190
- else:
191
- # Fallback méthode brute (moins précis mais robuste)
192
- subprocess.run(f"ffmpeg -i {full_wav} -f segment -segment_time 5 -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True, check=True)
193
- files = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
194
- for i, f in enumerate(files):
195
- segment_files.append({"file": f, "start_offset": i * 5.0})
196
 
197
  yield f"⏳ Phase 3/4 : Chargement de {model_name}...", None
198
  model = get_model(model_name)
 
1
+
 
 
 
 
 
2
 
3
  import os, shlex, subprocess, tempfile, traceback, time, glob, gc, shutil
4
  import torch
 
58
 
59
  if not nemo_file: raise FileNotFoundError("Fichier .nemo introuvable.")
60
 
61
+ # Correctif pour les clés "embedding_model" inattendues et erreur __init__
62
+ try:
63
+ model = nemo_asr.models.ASRModel.restore_from(
64
+ nemo_file,
65
+ map_location=torch.device(DEVICE),
66
+ strict=False,
67
+ override_config_path=None
68
+ )
69
+ except Exception as e:
70
+ print(f"⚠️ Tentative de chargement alternatif : {e}")
71
+ # Fallback sans override_config_path
72
+ model = nemo_asr.models.ASRModel.restore_from(
73
+ nemo_file,
74
+ map_location=torch.device(DEVICE),
75
+ strict=False
76
+ )
77
 
78
  model.to(DEVICE).eval()
79
  if DEVICE == "cuda":
 
168
  full_wav = os.path.join(tmp_dir, "full.wav")
169
  subprocess.run(f"ffmpeg -y -threads 0 -i {shlex.quote(video_in)} -vn -ac 1 -ar 16000 {full_wav}", shell=True, check=True)
170
 
171
+ yield "⏳ Phase 2/4 : Segmentation (5s optimisé Soloni)...", None
172
+
173
+ # Segmentation fixe 5s (optimal pour Soloni V2/V3)
174
+ subprocess.run(f"ffmpeg -i {full_wav} -f segment -segment_time 5 -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True, check=True)
175
+ files = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
176
 
 
 
 
 
 
 
 
177
  segment_files = []
178
+ for i, f in enumerate(files):
179
+ segment_files.append({"file": f, "start_offset": i * 5.0})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  yield f"⏳ Phase 3/4 : Chargement de {model_name}...", None
182
  model = get_model(model_name)