Spaces:

Invescoz
/

whisper.cpp

Sleeping

whisper.cpp / app.py

Update app.py

af83e32 verified 12 days ago

1.45 kB

	import gradio as gr
	from faster_whisper import WhisperModel
	import tempfile
	import os
	import numpy as np
	import wave

	# Load Whisper model (CPU, free tier safe)
	model = WhisperModel(
	"small",
	device="cpu",
	compute_type="int8"
	)

	def transcribe(audio):
	if audio is None:
	return {"error": "no audio"}

	sample_rate, data = audio

	# Save temp WAV
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
	with wave.open(f.name, "wb") as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(sample_rate)
	wf.writeframes((data * 32767).astype(np.int16).tobytes())
	path = f.name

	segments, info = model.transcribe(
	path,
	word_timestamps=True
	)

	os.remove(path)

	out_segments = []
	for seg in segments:
	out_segments.append({
	"start": round(seg.start, 2),
	"end": round(seg.end, 2),
	"text": seg.text.strip(),
	"words": [
	{
	"word": w.word,
	"start": round(w.start, 2),
	"end": round(w.end, 2)
	}
	for w in (seg.words or [])
	]
	})

	return {
	"language": info.language,
	"segments": out_segments
	}

	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="numpy"),
	outputs="json"
	)

	iface.launch()