CleanSong commited on
Commit
20277ed
·
verified ·
1 Parent(s): 6fedc1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -1,12 +1,21 @@
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  import torch as torch
 
 
 
 
 
 
 
4
 
5
  # Load large model once on startup
6
  model = WhisperModel("large-v3", device="cuda" if torch.cuda.is_available() else "cpu")
7
 
8
  def transcribe(file_path):
9
  segments, _ = model.transcribe(file_path, word_timestamps=True)
10
- return [{"word": s.text.strip(), "start": s.start, "end": s.end} for s in segments]
 
 
11
 
12
  gr.Interface(fn=transcribe, inputs=gr.Audio(type="filepath"), outputs="json").launch()
 
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  import torch as torch
4
+ import torchaudio
5
+
6
+ wav, sr = torchaudio.load(file_path)
7
+ if sr != 16000:
8
+ wav = torchaudio.functional.resample(wav, sr, 16000)
9
+ wav = wav.mean(dim=0, keepdim=True) # mono
10
+ torchaudio.save(file_path, wav, 16000)
11
 
12
  # Load large model once on startup
13
  model = WhisperModel("large-v3", device="cuda" if torch.cuda.is_available() else "cpu")
14
 
15
  def transcribe(file_path):
16
  segments, _ = model.transcribe(file_path, word_timestamps=True)
17
+ seen = set()
18
+ transcript = [seg for seg in transcript if not (seg["text"] in seen or seen.add(seg["text"]))]
19
+
20
 
21
  gr.Interface(fn=transcribe, inputs=gr.Audio(type="filepath"), outputs="json").launch()