|
|
from flask import Flask, request, jsonify, send_from_directory |
|
|
from faster_whisper import WhisperModel |
|
|
import tempfile, os, subprocess |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
|
|
|
model = WhisperModel("tiny", device="cpu") |
|
|
|
|
|
|
|
|
HTML_PAGE = """ |
|
|
<!DOCTYPE html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<title>Simple STT with 20s Timeout</title> |
|
|
<style> |
|
|
body { font-family: sans-serif; text-align: center; margin-top: 80px; } |
|
|
button { padding: 10px 20px; font-size: 18px; border-radius: 10px; border: none; background: #4e8cff; color: white; cursor: pointer; } |
|
|
#log { margin-top: 30px; font-size: 18px; white-space: pre-line; } |
|
|
</style> |
|
|
</head> |
|
|
<body> |
|
|
<h2>๐๏ธ Speech to Text (Whisper Local)</h2> |
|
|
<button id="recordBtn">Start Recording</button> |
|
|
<div id="log">Press the button to record up to 20s of audio.</div> |
|
|
|
|
|
<script> |
|
|
const logEl = document.getElementById('log'); |
|
|
const btn = document.getElementById('recordBtn'); |
|
|
|
|
|
async function recordAndSend() { |
|
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
|
|
const recorder = new MediaRecorder(stream); |
|
|
const chunks = []; |
|
|
|
|
|
btn.disabled = true; |
|
|
btn.textContent = "Recording..."; |
|
|
logEl.textContent = "Listening for up to 20 seconds..."; |
|
|
|
|
|
recorder.ondataavailable = e => chunks.push(e.data); |
|
|
recorder.onstop = async () => { |
|
|
logEl.textContent = "Processing..."; |
|
|
const blob = new Blob(chunks, { type: 'audio/webm' }); |
|
|
const arrayBuffer = await blob.arrayBuffer(); |
|
|
|
|
|
try { |
|
|
const res = await fetch("/transcribe", { |
|
|
method: "POST", |
|
|
headers: { "Content-Type": "audio/webm" }, |
|
|
body: arrayBuffer |
|
|
}); |
|
|
const data = await res.json(); |
|
|
if (data.text) logEl.textContent = "๐ Transcription:\\n" + data.text; |
|
|
else logEl.textContent = "Error: " + JSON.stringify(data); |
|
|
} catch (err) { |
|
|
logEl.textContent = "Network error: " + err.message; |
|
|
} |
|
|
btn.disabled = false; |
|
|
btn.textContent = "Start Recording"; |
|
|
}; |
|
|
|
|
|
recorder.start(); |
|
|
setTimeout(() => recorder.stop(), 20000); // stop after 20s |
|
|
} |
|
|
|
|
|
btn.onclick = recordAndSend; |
|
|
</script> |
|
|
</body> |
|
|
</html> |
|
|
""" |
|
|
|
|
|
@app.get("/") |
|
|
def index(): |
|
|
return HTML_PAGE |
|
|
|
|
|
@app.post("/transcribe") |
|
|
def transcribe_audio(): |
|
|
try: |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp: |
|
|
tmp.write(request.data) |
|
|
tmp_path = tmp.name |
|
|
|
|
|
wav_path = tmp_path.replace(".webm", ".wav") |
|
|
subprocess.run( |
|
|
["ffmpeg", "-y", "-i", tmp_path, "-ar", "16000", "-ac", "1", wav_path], |
|
|
stdout=subprocess.DEVNULL, |
|
|
stderr=subprocess.DEVNULL |
|
|
) |
|
|
|
|
|
segments, info = model.transcribe(wav_path, beam_size=1) |
|
|
text = " ".join([seg.text for seg in segments]).strip() |
|
|
|
|
|
os.remove(tmp_path) |
|
|
os.remove(wav_path) |
|
|
|
|
|
return jsonify({"text": text, "language": info.language}) |
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(host="0.0.0.0", port=7860) |