Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from faster_whisper import WhisperModel | |
| import tempfile | |
| import os | |
| import numpy as np | |
| import wave | |
| # Load Whisper model (CPU, free tier safe) | |
| model = WhisperModel( | |
| "small", | |
| device="cpu", | |
| compute_type="int8" | |
| ) | |
| def transcribe(audio): | |
| if audio is None: | |
| return {"error": "no audio"} | |
| sample_rate, data = audio | |
| # Save temp WAV | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: | |
| with wave.open(f.name, "wb") as wf: | |
| wf.setnchannels(1) | |
| wf.setsampwidth(2) | |
| wf.setframerate(sample_rate) | |
| wf.writeframes((data * 32767).astype(np.int16).tobytes()) | |
| path = f.name | |
| segments, info = model.transcribe( | |
| path, | |
| word_timestamps=True | |
| ) | |
| os.remove(path) | |
| out_segments = [] | |
| for seg in segments: | |
| out_segments.append({ | |
| "start": round(seg.start, 2), | |
| "end": round(seg.end, 2), | |
| "text": seg.text.strip(), | |
| "words": [ | |
| { | |
| "word": w.word, | |
| "start": round(w.start, 2), | |
| "end": round(w.end, 2) | |
| } | |
| for w in (seg.words or []) | |
| ] | |
| }) | |
| return { | |
| "language": info.language, | |
| "segments": out_segments | |
| } | |
| iface = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(type="numpy"), | |
| outputs="json" | |
| ) | |
| iface.launch() | |