ai_omoo / app.py
Pepguy's picture
Update app.py
3bf4d41 verified
from flask import Flask, request, jsonify, send_from_directory
from faster_whisper import WhisperModel
import tempfile, os, subprocess
app = Flask(__name__)
# Load Whisper model once on startup
model = WhisperModel("tiny", device="cpu")
# --- HTML served directly ---
HTML_PAGE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Simple STT with 20s Timeout</title>
<style>
body { font-family: sans-serif; text-align: center; margin-top: 80px; }
button { padding: 10px 20px; font-size: 18px; border-radius: 10px; border: none; background: #4e8cff; color: white; cursor: pointer; }
#log { margin-top: 30px; font-size: 18px; white-space: pre-line; }
</style>
</head>
<body>
<h2>๐ŸŽ™๏ธ Speech to Text (Whisper Local)</h2>
<button id="recordBtn">Start Recording</button>
<div id="log">Press the button to record up to 20s of audio.</div>
<script>
const logEl = document.getElementById('log');
const btn = document.getElementById('recordBtn');
async function recordAndSend() {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const recorder = new MediaRecorder(stream);
const chunks = [];
btn.disabled = true;
btn.textContent = "Recording...";
logEl.textContent = "Listening for up to 20 seconds...";
recorder.ondataavailable = e => chunks.push(e.data);
recorder.onstop = async () => {
logEl.textContent = "Processing...";
const blob = new Blob(chunks, { type: 'audio/webm' });
const arrayBuffer = await blob.arrayBuffer();
try {
const res = await fetch("/transcribe", {
method: "POST",
headers: { "Content-Type": "audio/webm" },
body: arrayBuffer
});
const data = await res.json();
if (data.text) logEl.textContent = "๐Ÿ“ Transcription:\\n" + data.text;
else logEl.textContent = "Error: " + JSON.stringify(data);
} catch (err) {
logEl.textContent = "Network error: " + err.message;
}
btn.disabled = false;
btn.textContent = "Start Recording";
};
recorder.start();
setTimeout(() => recorder.stop(), 20000); // stop after 20s
}
btn.onclick = recordAndSend;
</script>
</body>
</html>
"""
@app.get("/")
def index():
return HTML_PAGE
@app.post("/transcribe")
def transcribe_audio():
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
tmp.write(request.data)
tmp_path = tmp.name
wav_path = tmp_path.replace(".webm", ".wav")
subprocess.run(
["ffmpeg", "-y", "-i", tmp_path, "-ar", "16000", "-ac", "1", wav_path],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
segments, info = model.transcribe(wav_path, beam_size=1)
text = " ".join([seg.text for seg in segments]).strip()
os.remove(tmp_path)
os.remove(wav_path)
return jsonify({"text": text, "language": info.language})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)