ai_omoo

Sleeping

File size: 3,067 Bytes

from flask import Flask, request, jsonify, send_from_directory
from faster_whisper import WhisperModel
import tempfile, os, subprocess

app = Flask(__name__)

# Load Whisper model once on startup
model = WhisperModel("tiny", device="cpu")

# --- HTML served directly ---
HTML_PAGE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Simple STT with 20s Timeout</title>
<style>
  body { font-family: sans-serif; text-align: center; margin-top: 80px; }
  button { padding: 10px 20px; font-size: 18px; border-radius: 10px; border: none; background: #4e8cff; color: white; cursor: pointer; }
  #log { margin-top: 30px; font-size: 18px; white-space: pre-line; }
</style>
</head>
<body>
  <h2>🎙️ Speech to Text (Whisper Local)</h2>
  <button id="recordBtn">Start Recording</button>
  <div id="log">Press the button to record up to 20s of audio.</div>

<script>
const logEl = document.getElementById('log');
const btn = document.getElementById('recordBtn');

async function recordAndSend() {
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  const recorder = new MediaRecorder(stream);
  const chunks = [];

  btn.disabled = true;
  btn.textContent = "Recording...";
  logEl.textContent = "Listening for up to 20 seconds...";

  recorder.ondataavailable = e => chunks.push(e.data);
  recorder.onstop = async () => {
    logEl.textContent = "Processing...";
    const blob = new Blob(chunks, { type: 'audio/webm' });
    const arrayBuffer = await blob.arrayBuffer();

    try {
      const res = await fetch("/transcribe", {
        method: "POST",
        headers: { "Content-Type": "audio/webm" },
        body: arrayBuffer
      });
      const data = await res.json();
      if (data.text) logEl.textContent = "📝 Transcription:\\n" + data.text;
      else logEl.textContent = "Error: " + JSON.stringify(data);
    } catch (err) {
      logEl.textContent = "Network error: " + err.message;
    }
    btn.disabled = false;
    btn.textContent = "Start Recording";
  };

  recorder.start();
  setTimeout(() => recorder.stop(), 20000); // stop after 20s
}

btn.onclick = recordAndSend;
</script>
</body>
</html>
"""

@app.get("/")
def index():
    return HTML_PAGE

@app.post("/transcribe")
def transcribe_audio():
    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
            tmp.write(request.data)
            tmp_path = tmp.name

        wav_path = tmp_path.replace(".webm", ".wav")
        subprocess.run(
            ["ffmpeg", "-y", "-i", tmp_path, "-ar", "16000", "-ac", "1", wav_path],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL
        )

        segments, info = model.transcribe(wav_path, beam_size=1)
        text = " ".join([seg.text for seg in segments]).strip()

        os.remove(tmp_path)
        os.remove(wav_path)

        return jsonify({"text": text, "language": info.language})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)