Spaces:
Sleeping
Sleeping
File size: 2,622 Bytes
15f3e71 7049779 15f3e71 042b4b4 b364fbe 15f3e71 f3dd1db 042b4b4 15f3e71 042b4b4 7049779 042b4b4 7049779 15f3e71 393ac47 15f3e71 0c3824d 15f3e71 042b4b4 15f3e71 b364fbe 15f3e71 042b4b4 b364fbe 042b4b4 b364fbe b5db09d b364fbe b5db09d b364fbe b5db09d b364fbe b5db09d b364fbe b5db09d b364fbe b5db09d b364fbe 042b4b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from fastapi import FastAPI, UploadFile, File, WebSocket
from pywhispercpp.model import Model
import uvicorn, tempfile, os
from time import time
import wave
app = FastAPI(title="pyWhisperCPP API")
model = Model("base.en")
@app.get("/")
def root():
return {"status": "Whisper.cpp API is running!"}
@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...)):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
temp.write(await file.read())
temp.flush()
audio_path = temp.name
start = time()
segments = model.transcribe(audio_path)
text = " ".join(seg.text for seg in segments)
os.remove(audio_path)
return {"text": text}
# ================================
# 🔥 Real-time streaming endpoint
# ================================
def save_pcm16_as_wav(data: bytes, sample_rate=16000, channels=1):
"""Convert PCM16 stream into a valid WAV file"""
temp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
with wave.open(temp.name, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(2) # PCM16 = 2 bytes per sample
wf.setframerate(sample_rate)
wf.writeframes(data)
return temp.name
@app.websocket("/ws/live")
async def websocket_live(websocket: WebSocket):
await websocket.accept()
print("Client connected!")
buffer = b""
SAMPLE_RATE = 16000
MIN_PCM_SIZE = SAMPLE_RATE * 2 * 2 # 2 seconds PCM16
try:
while True:
data = await websocket.receive()
# Text (control)
if data.get("text"):
if data["text"] == "__END__":
break
continue
# Binary PCM audio
chunk = data.get("bytes", b"")
if not chunk:
continue
buffer += chunk
if len(buffer) >= MIN_PCM_SIZE:
print(f"Transcribing {len(buffer)} bytes")
# Convert PCM16 → WAV with header
audio_path = save_pcm16_as_wav(buffer, SAMPLE_RATE)
segments = model.transcribe(audio_path)
text = " ".join(seg.text for seg in segments).strip()
os.remove(audio_path)
buffer = b""
if text:
await websocket.send_text(text)
await websocket.send_text("[END]")
except Exception as e:
print("WebSocket Error:", e)
finally:
await websocket.close()
print("Client disconnected")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|