File size: 3,067 Bytes
3bf4d41 07221e5 0998987 e285d1f 3bf4d41 262b239 3bf4d41 0998987 262b239 3bf4d41 262b239 3bf4d41 262b239 3bf4d41 262b239 3bf4d41 262b239 0998987 3bf4d41 0998987 3bf4d41 0998987 3bf4d41 d204a95 3bf4d41 431bd15 3bf4d41 431bd15 3bf4d41 0998987 3bf4d41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
from flask import Flask, request, jsonify, send_from_directory
from faster_whisper import WhisperModel
import tempfile, os, subprocess
app = Flask(__name__)
# Load Whisper model once on startup
model = WhisperModel("tiny", device="cpu")
# --- HTML served directly ---
HTML_PAGE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Simple STT with 20s Timeout</title>
<style>
body { font-family: sans-serif; text-align: center; margin-top: 80px; }
button { padding: 10px 20px; font-size: 18px; border-radius: 10px; border: none; background: #4e8cff; color: white; cursor: pointer; }
#log { margin-top: 30px; font-size: 18px; white-space: pre-line; }
</style>
</head>
<body>
<h2>🎙️ Speech to Text (Whisper Local)</h2>
<button id="recordBtn">Start Recording</button>
<div id="log">Press the button to record up to 20s of audio.</div>
<script>
const logEl = document.getElementById('log');
const btn = document.getElementById('recordBtn');
async function recordAndSend() {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const recorder = new MediaRecorder(stream);
const chunks = [];
btn.disabled = true;
btn.textContent = "Recording...";
logEl.textContent = "Listening for up to 20 seconds...";
recorder.ondataavailable = e => chunks.push(e.data);
recorder.onstop = async () => {
logEl.textContent = "Processing...";
const blob = new Blob(chunks, { type: 'audio/webm' });
const arrayBuffer = await blob.arrayBuffer();
try {
const res = await fetch("/transcribe", {
method: "POST",
headers: { "Content-Type": "audio/webm" },
body: arrayBuffer
});
const data = await res.json();
if (data.text) logEl.textContent = "📝 Transcription:\\n" + data.text;
else logEl.textContent = "Error: " + JSON.stringify(data);
} catch (err) {
logEl.textContent = "Network error: " + err.message;
}
btn.disabled = false;
btn.textContent = "Start Recording";
};
recorder.start();
setTimeout(() => recorder.stop(), 20000); // stop after 20s
}
btn.onclick = recordAndSend;
</script>
</body>
</html>
"""
@app.get("/")
def index():
return HTML_PAGE
@app.post("/transcribe")
def transcribe_audio():
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
tmp.write(request.data)
tmp_path = tmp.name
wav_path = tmp_path.replace(".webm", ".wav")
subprocess.run(
["ffmpeg", "-y", "-i", tmp_path, "-ar", "16000", "-ac", "1", wav_path],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
segments, info = model.transcribe(wav_path, beam_size=1)
text = " ".join([seg.text for seg in segments]).strip()
os.remove(tmp_path)
os.remove(wav_path)
return jsonify({"text": text, "language": info.language})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) |