Invescoz commited on
Commit
86d5d76
·
verified ·
1 Parent(s): df2f652

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -23
app.py CHANGED
@@ -1,49 +1,58 @@
1
  import gradio as gr
2
- import whisper
3
  import tempfile
4
  import os
 
5
 
6
- model = whisper.load_model("small") # supports 50+ languages
 
 
 
 
 
7
 
8
  def transcribe(audio):
9
  if audio is None:
10
  return {"error": "no audio"}
11
 
12
- # audio = (sample_rate, numpy_array)
13
  sample_rate, data = audio
14
 
15
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
16
- import soundfile as sf
17
- sf.write(tmp.name, data, sample_rate)
18
- path = tmp.name
 
 
 
 
 
19
 
20
- result = model.transcribe(
21
  path,
22
- word_timestamps=True,
23
- verbose=False
24
  )
25
 
26
  os.remove(path)
27
 
28
- segments = []
29
- for seg in result["segments"]:
30
- segments.append({
31
- "start": round(seg["start"], 2),
32
- "end": round(seg["end"], 2),
33
- "text": seg["text"].strip(),
34
  "words": [
35
  {
36
- "word": w["word"],
37
- "start": round(w["start"], 2),
38
- "end": round(w["end"], 2)
39
  }
40
- for w in seg.get("words", [])
41
  ]
42
  })
43
 
44
  return {
45
- "language": result["language"],
46
- "segments": segments
47
  }
48
 
49
  iface = gr.Interface(
@@ -53,4 +62,4 @@ iface = gr.Interface(
53
  allow_flagging="never"
54
  )
55
 
56
- iface.launch(server_name="0.0.0.0")
 
1
  import gradio as gr
2
+ from faster_whisper import WhisperModel
3
  import tempfile
4
  import os
5
+ import numpy as np
6
 
7
+ # Load model (small = good balance)
8
+ model = WhisperModel(
9
+ "small",
10
+ device="cpu",
11
+ compute_type="int8"
12
+ )
13
 
14
  def transcribe(audio):
15
  if audio is None:
16
  return {"error": "no audio"}
17
 
 
18
  sample_rate, data = audio
19
 
20
+ # Save temp wav
21
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
22
+ import wave
23
+ with wave.open(f.name, "wb") as wf:
24
+ wf.setnchannels(1)
25
+ wf.setsampwidth(2)
26
+ wf.setframerate(sample_rate)
27
+ wf.writeframes((data * 32767).astype(np.int16).tobytes())
28
+ path = f.name
29
 
30
+ segments, info = model.transcribe(
31
  path,
32
+ word_timestamps=True
 
33
  )
34
 
35
  os.remove(path)
36
 
37
+ out_segments = []
38
+ for seg in segments:
39
+ out_segments.append({
40
+ "start": round(seg.start, 2),
41
+ "end": round(seg.end, 2),
42
+ "text": seg.text.strip(),
43
  "words": [
44
  {
45
+ "word": w.word,
46
+ "start": round(w.start, 2),
47
+ "end": round(w.end, 2)
48
  }
49
+ for w in (seg.words or [])
50
  ]
51
  })
52
 
53
  return {
54
+ "language": info.language,
55
+ "segments": out_segments
56
  }
57
 
58
  iface = gr.Interface(
 
62
  allow_flagging="never"
63
  )
64
 
65
+ iface.launch()