binaryMao commited on
Commit
0456de7
·
verified ·
1 Parent(s): 605a27b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- import os
3
  import numpy as np
4
  import torch
5
  import soundfile as sf
@@ -26,7 +25,7 @@ MODELS = {
26
 
27
 
28
  # =============================
29
- # EXTRACTION AUDIO (FIABLE + COMPATIBLE HF & COLAB)
30
  # =============================
31
 
32
  def extract_audio(video_path, wav_path):
@@ -65,7 +64,7 @@ def transcribe(model, device, wav, model_name):
65
  hyp = hyps[0][0] if isinstance(hyps[0], list) else hyps[0]
66
  return [(w.start_offset_ms/1000, w.end_offset_ms/1000, w.word) for w in hyp.words]
67
 
68
- # === Soloba / QuartzNet → Forced Alignment CTC ===
69
  text = model.transcribe([wav])[0].strip()
70
  if not text:
71
  return []
@@ -85,6 +84,7 @@ def transcribe(model, device, wav, model_name):
85
  timings[i+1] * tps if i+1 < len(timings) else total_s,
86
  words[i]) for i in range(len(words))]
87
 
 
88
  grouped, temp = [], []
89
  for w in aligned:
90
  temp.append(w)
@@ -122,13 +122,14 @@ def burn(video, subs):
122
  final = CompositeVideoClip([clip] + layers)
123
  out = "RobotsMali_Subtitled.mp4"
124
  final.write_videofile(out, codec="libx264", audio_codec="aac", fps=clip.fps, verbose=False, logger=None)
 
125
  clip.close()
126
  final.close()
127
  return out
128
 
129
 
130
  # =============================
131
- # PIPELINE
132
  # =============================
133
 
134
  def pipeline(video_file, model_name):
@@ -136,7 +137,15 @@ def pipeline(video_file, model_name):
136
  return "Veuillez importer une vidéo.", None
137
 
138
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
139
- model = nemo_asr.models.ASRModel.from_pretrained(MODELS[model_name]).to(device)
 
 
 
 
 
 
 
 
140
 
141
  wav = "audio.wav"
142
  extract_audio(video_file, wav)
@@ -147,17 +156,17 @@ def pipeline(video_file, model_name):
147
 
148
 
149
  # =============================
150
- # INTERFACE (inchangée)
151
  # =============================
152
 
153
  with gr.Blocks() as demo:
154
- gr.Markdown("# 🎙️ **RobotsMali - Sous-titrage Bambara Automatique**")
155
 
156
  video = gr.Video(label="Vidéo")
157
  model = gr.Dropdown(list(MODELS.keys()), value="Soloni V1", label="Modèle")
158
  btn = gr.Button("⚡ Générer les sous-titres")
159
  status = gr.Markdown()
160
- out = gr.Video(label="Résultat")
161
 
162
  btn.click(pipeline, inputs=[video, model], outputs=[status, out])
163
 
 
1
  import gradio as gr
 
2
  import numpy as np
3
  import torch
4
  import soundfile as sf
 
25
 
26
 
27
  # =============================
28
+ # EXTRACTION AUDIO (SOLIDE & COMPATIBLE HF)
29
  # =============================
30
 
31
  def extract_audio(video_path, wav_path):
 
64
  hyp = hyps[0][0] if isinstance(hyps[0], list) else hyps[0]
65
  return [(w.start_offset_ms/1000, w.end_offset_ms/1000, w.word) for w in hyp.words]
66
 
67
+ # === Soloba & QuartzNet → CTC Forced Alignment ===
68
  text = model.transcribe([wav])[0].strip()
69
  if not text:
70
  return []
 
84
  timings[i+1] * tps if i+1 < len(timings) else total_s,
85
  words[i]) for i in range(len(words))]
86
 
87
+ # Groupage lisible (max 4 mots par sous-titre)
88
  grouped, temp = [], []
89
  for w in aligned:
90
  temp.append(w)
 
122
  final = CompositeVideoClip([clip] + layers)
123
  out = "RobotsMali_Subtitled.mp4"
124
  final.write_videofile(out, codec="libx264", audio_codec="aac", fps=clip.fps, verbose=False, logger=None)
125
+
126
  clip.close()
127
  final.close()
128
  return out
129
 
130
 
131
  # =============================
132
+ # PIPELINE PRINCIPAL
133
  # =============================
134
 
135
  def pipeline(video_file, model_name):
 
137
  return "Veuillez importer une vidéo.", None
138
 
139
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
140
+
141
+ # Chargement correct selon le modèle
142
+ if "Soloni" in model_name:
143
+ model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(model_name=MODELS[model_name])
144
+ else:
145
+ model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(model_name=MODELS[model_name])
146
+
147
+ model = model.to(device)
148
+ model.eval()
149
 
150
  wav = "audio.wav"
151
  extract_audio(video_file, wav)
 
156
 
157
 
158
  # =============================
159
+ # INTERFACE (DESIGN CONSERVÉ)
160
  # =============================
161
 
162
  with gr.Blocks() as demo:
163
+ gr.Markdown("# 🎙️ **RobotsMali Sous-titrage automatique Bambara**")
164
 
165
  video = gr.Video(label="Vidéo")
166
  model = gr.Dropdown(list(MODELS.keys()), value="Soloni V1", label="Modèle")
167
  btn = gr.Button("⚡ Générer les sous-titres")
168
  status = gr.Markdown()
169
+ out = gr.Video(label="Résultat (avec sous-titres)")
170
 
171
  btn.click(pipeline, inputs=[video, model], outputs=[status, out])
172