File size: 2,622 Bytes
15f3e71
7049779
15f3e71
042b4b4
b364fbe
15f3e71
f3dd1db
042b4b4
15f3e71
 
 
042b4b4
7049779
042b4b4
7049779
 
 
 
 
15f3e71
 
393ac47
15f3e71
0c3824d
15f3e71
042b4b4
15f3e71
 
 
b364fbe
 
 
 
 
 
 
 
 
 
 
 
15f3e71
 
042b4b4
b364fbe
 
042b4b4
b364fbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5db09d
b364fbe
 
b5db09d
b364fbe
 
b5db09d
b364fbe
 
b5db09d
b364fbe
b5db09d
b364fbe
 
b5db09d
b364fbe
 
 
042b4b4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from fastapi import FastAPI, UploadFile, File, WebSocket
from pywhispercpp.model import Model
import uvicorn, tempfile, os
from time import time
import wave
app = FastAPI(title="pyWhisperCPP API")
model = Model("base.en")

@app.get("/")
def root():
    return {"status": "Whisper.cpp API is running!"}

@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...)):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
        temp.write(await file.read())
        temp.flush()
        audio_path = temp.name

    start = time()
    segments = model.transcribe(audio_path)
    text = " ".join(seg.text for seg in segments)
    os.remove(audio_path)

    return {"text": text}

# ================================
# 🔥 Real-time streaming endpoint
# ================================

def save_pcm16_as_wav(data: bytes, sample_rate=16000, channels=1):
    """Convert PCM16 stream into a valid WAV file"""
    temp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    with wave.open(temp.name, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(2)  # PCM16 = 2 bytes per sample
        wf.setframerate(sample_rate)
        wf.writeframes(data)
    return temp.name


@app.websocket("/ws/live")
async def websocket_live(websocket: WebSocket):
    await websocket.accept()
    print("Client connected!")

    buffer = b""
    SAMPLE_RATE = 16000
    MIN_PCM_SIZE = SAMPLE_RATE * 2 * 2  # 2 seconds PCM16

    try:
        while True:
            data = await websocket.receive()

            # Text (control)
            if data.get("text"):
                if data["text"] == "__END__":
                    break
                continue

            # Binary PCM audio
            chunk = data.get("bytes", b"")
            if not chunk:
                continue

            buffer += chunk

            if len(buffer) >= MIN_PCM_SIZE:
                print(f"Transcribing {len(buffer)} bytes")

                # Convert PCM16 → WAV with header
                audio_path = save_pcm16_as_wav(buffer, SAMPLE_RATE)

                segments = model.transcribe(audio_path)
                text = " ".join(seg.text for seg in segments).strip()

                os.remove(audio_path)
                buffer = b""

                if text:
                    await websocket.send_text(text)

        await websocket.send_text("[END]")

    except Exception as e:
        print("WebSocket Error:", e)

    finally:
        await websocket.close()
        print("Client disconnected")

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)