Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,49 +1,58 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import
|
| 3 |
import tempfile
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
-
model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def transcribe(audio):
|
| 9 |
if audio is None:
|
| 10 |
return {"error": "no audio"}
|
| 11 |
|
| 12 |
-
# audio = (sample_rate, numpy_array)
|
| 13 |
sample_rate, data = audio
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
path,
|
| 22 |
-
word_timestamps=True
|
| 23 |
-
verbose=False
|
| 24 |
)
|
| 25 |
|
| 26 |
os.remove(path)
|
| 27 |
|
| 28 |
-
|
| 29 |
-
for seg in
|
| 30 |
-
|
| 31 |
-
"start": round(seg
|
| 32 |
-
"end": round(seg
|
| 33 |
-
"text": seg
|
| 34 |
"words": [
|
| 35 |
{
|
| 36 |
-
"word": w
|
| 37 |
-
"start": round(w
|
| 38 |
-
"end": round(w
|
| 39 |
}
|
| 40 |
-
for w in seg.
|
| 41 |
]
|
| 42 |
})
|
| 43 |
|
| 44 |
return {
|
| 45 |
-
"language":
|
| 46 |
-
"segments":
|
| 47 |
}
|
| 48 |
|
| 49 |
iface = gr.Interface(
|
|
@@ -53,4 +62,4 @@ iface = gr.Interface(
|
|
| 53 |
allow_flagging="never"
|
| 54 |
)
|
| 55 |
|
| 56 |
-
iface.launch(
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from faster_whisper import WhisperModel
|
| 3 |
import tempfile
|
| 4 |
import os
|
| 5 |
+
import numpy as np
|
| 6 |
|
| 7 |
+
# Load model (small = good balance)
|
| 8 |
+
model = WhisperModel(
|
| 9 |
+
"small",
|
| 10 |
+
device="cpu",
|
| 11 |
+
compute_type="int8"
|
| 12 |
+
)
|
| 13 |
|
| 14 |
def transcribe(audio):
|
| 15 |
if audio is None:
|
| 16 |
return {"error": "no audio"}
|
| 17 |
|
|
|
|
| 18 |
sample_rate, data = audio
|
| 19 |
|
| 20 |
+
# Save temp wav
|
| 21 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 22 |
+
import wave
|
| 23 |
+
with wave.open(f.name, "wb") as wf:
|
| 24 |
+
wf.setnchannels(1)
|
| 25 |
+
wf.setsampwidth(2)
|
| 26 |
+
wf.setframerate(sample_rate)
|
| 27 |
+
wf.writeframes((data * 32767).astype(np.int16).tobytes())
|
| 28 |
+
path = f.name
|
| 29 |
|
| 30 |
+
segments, info = model.transcribe(
|
| 31 |
path,
|
| 32 |
+
word_timestamps=True
|
|
|
|
| 33 |
)
|
| 34 |
|
| 35 |
os.remove(path)
|
| 36 |
|
| 37 |
+
out_segments = []
|
| 38 |
+
for seg in segments:
|
| 39 |
+
out_segments.append({
|
| 40 |
+
"start": round(seg.start, 2),
|
| 41 |
+
"end": round(seg.end, 2),
|
| 42 |
+
"text": seg.text.strip(),
|
| 43 |
"words": [
|
| 44 |
{
|
| 45 |
+
"word": w.word,
|
| 46 |
+
"start": round(w.start, 2),
|
| 47 |
+
"end": round(w.end, 2)
|
| 48 |
}
|
| 49 |
+
for w in (seg.words or [])
|
| 50 |
]
|
| 51 |
})
|
| 52 |
|
| 53 |
return {
|
| 54 |
+
"language": info.language,
|
| 55 |
+
"segments": out_segments
|
| 56 |
}
|
| 57 |
|
| 58 |
iface = gr.Interface(
|
|
|
|
| 62 |
allow_flagging="never"
|
| 63 |
)
|
| 64 |
|
| 65 |
+
iface.launch()
|