File size: 3,067 Bytes
3bf4d41
 
 
07221e5
0998987
e285d1f
3bf4d41
 
262b239
3bf4d41
 
0998987
 
262b239
3bf4d41
 
 
 
 
 
 
262b239
 
3bf4d41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262b239
3bf4d41
 
 
 
 
 
 
 
 
 
 
 
 
262b239
3bf4d41
 
262b239
 
0998987
 
3bf4d41
0998987
3bf4d41
0998987
3bf4d41
 
 
 
 
 
d204a95
3bf4d41
 
 
 
 
 
431bd15
3bf4d41
 
431bd15
3bf4d41
 
 
 
0998987
 
 
 
3bf4d41
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from flask import Flask, request, jsonify, send_from_directory
from faster_whisper import WhisperModel
import tempfile, os, subprocess

app = Flask(__name__)

# Load Whisper model once on startup
model = WhisperModel("tiny", device="cpu")

# --- HTML served directly ---
HTML_PAGE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Simple STT with 20s Timeout</title>
<style>
  body { font-family: sans-serif; text-align: center; margin-top: 80px; }
  button { padding: 10px 20px; font-size: 18px; border-radius: 10px; border: none; background: #4e8cff; color: white; cursor: pointer; }
  #log { margin-top: 30px; font-size: 18px; white-space: pre-line; }
</style>
</head>
<body>
  <h2>🎙️ Speech to Text (Whisper Local)</h2>
  <button id="recordBtn">Start Recording</button>
  <div id="log">Press the button to record up to 20s of audio.</div>

<script>
const logEl = document.getElementById('log');
const btn = document.getElementById('recordBtn');

async function recordAndSend() {
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  const recorder = new MediaRecorder(stream);
  const chunks = [];

  btn.disabled = true;
  btn.textContent = "Recording...";
  logEl.textContent = "Listening for up to 20 seconds...";

  recorder.ondataavailable = e => chunks.push(e.data);
  recorder.onstop = async () => {
    logEl.textContent = "Processing...";
    const blob = new Blob(chunks, { type: 'audio/webm' });
    const arrayBuffer = await blob.arrayBuffer();

    try {
      const res = await fetch("/transcribe", {
        method: "POST",
        headers: { "Content-Type": "audio/webm" },
        body: arrayBuffer
      });
      const data = await res.json();
      if (data.text) logEl.textContent = "📝 Transcription:\\n" + data.text;
      else logEl.textContent = "Error: " + JSON.stringify(data);
    } catch (err) {
      logEl.textContent = "Network error: " + err.message;
    }
    btn.disabled = false;
    btn.textContent = "Start Recording";
  };

  recorder.start();
  setTimeout(() => recorder.stop(), 20000); // stop after 20s
}

btn.onclick = recordAndSend;
</script>
</body>
</html>
"""

@app.get("/")
def index():
    return HTML_PAGE

@app.post("/transcribe")
def transcribe_audio():
    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
            tmp.write(request.data)
            tmp_path = tmp.name

        wav_path = tmp_path.replace(".webm", ".wav")
        subprocess.run(
            ["ffmpeg", "-y", "-i", tmp_path, "-ar", "16000", "-ac", "1", wav_path],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL
        )

        segments, info = model.transcribe(wav_path, beam_size=1)
        text = " ".join([seg.text for seg in segments]).strip()

        os.remove(tmp_path)
        os.remove(wav_path)

        return jsonify({"text": text, "language": info.language})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)