Invescoz commited on
Commit
968aad0
·
verified ·
1 Parent(s): 9994d98

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import tempfile
4
+ import os
5
+
6
+ model = whisper.load_model("small") # supports 50+ languages
7
+
8
+ def transcribe(audio):
9
+ if audio is None:
10
+ return {"error": "no audio"}
11
+
12
+ # audio = (sample_rate, numpy_array)
13
+ sample_rate, data = audio
14
+
15
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
16
+ import soundfile as sf
17
+ sf.write(tmp.name, data, sample_rate)
18
+ path = tmp.name
19
+
20
+ result = model.transcribe(
21
+ path,
22
+ word_timestamps=True,
23
+ verbose=False
24
+ )
25
+
26
+ os.remove(path)
27
+
28
+ segments = []
29
+ for seg in result["segments"]:
30
+ segments.append({
31
+ "start": round(seg["start"], 2),
32
+ "end": round(seg["end"], 2),
33
+ "text": seg["text"].strip(),
34
+ "words": [
35
+ {
36
+ "word": w["word"],
37
+ "start": round(w["start"], 2),
38
+ "end": round(w["end"], 2)
39
+ }
40
+ for w in seg.get("words", [])
41
+ ]
42
+ })
43
+
44
+ return {
45
+ "language": result["language"],
46
+ "segments": segments
47
+ }
48
+
49
+ iface = gr.Interface(
50
+ fn=transcribe,
51
+ inputs=gr.Audio(type="numpy"),
52
+ outputs="json",
53
+ allow_flagging="never"
54
+ )
55
+
56
+ iface.launch(server_name="0.0.0.0")