Charan5775 commited on
Commit
f3dd1db
·
verified ·
1 Parent(s): 8b37a13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -41
app.py CHANGED
@@ -1,70 +1,120 @@
1
- from fastapi import FastAPI, WebSocket, UploadFile, File
 
 
2
  from pywhispercpp.model import Model
3
  import uvicorn
4
  import tempfile
5
  import os
6
  from time import time
7
 
8
- app = FastAPI(title="pyWhisperCPP API")
9
 
10
- model = Model('base.en')
 
 
 
 
 
 
 
11
 
 
 
 
12
 
13
- @app.get("/")
14
- def root():
15
- return {"status": "Whisper.cpp API is running!"}
16
 
 
 
 
 
 
 
17
 
 
 
18
  @app.post("/transcribe")
19
  async def transcribe(file: UploadFile = File(...)):
 
20
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
21
  temp.write(await file.read())
22
  temp.flush()
23
  audio_path = temp.name
24
 
25
- start = time()
26
- segments = model.transcribe(audio_path)
27
- text = " ".join([seg.text for seg in segments])
28
- elapsed = round(time() - start, 3)
29
- os.remove(audio_path)
30
 
31
- return {"text": text, "processing_time_seconds": elapsed}
 
 
 
 
 
32
 
33
 
34
- # ==========================================================
35
- # 🔥 Real-time Speech Recognition (WebSocket)
36
- # ==========================================================
37
  @app.websocket("/ws/transcribe_stream")
38
  async def websocket_transcription(websocket: WebSocket):
 
 
 
 
 
39
  await websocket.accept()
40
  buffer = b""
41
-
42
- while True:
43
- chunk = await websocket.receive_bytes()
44
- if chunk == b"__END__":
45
- break
46
-
47
- buffer += chunk
48
-
49
- # When buffer > 1 sec of audio, transcribe
50
- if len(buffer) >= 16000 * 2: # 16kHz * 2 bytes = 1 second PCM16
51
- # Save buffer temporarily
52
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
53
- temp.write(buffer)
54
- temp.flush()
55
- audio_path = temp.name
56
-
57
- segments = model.transcribe(audio_path)
58
- text = " ".join([seg.text for seg in segments])
59
-
60
- await websocket.send_text(text)
61
- buffer = b"" # clear for next batch
62
-
63
- os.remove(audio_path)
64
-
65
- await websocket.send_text("stream_end")
66
- await websocket.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  if __name__ == "__main__":
 
70
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI, UploadFile, File, WebSocket, WebSocketDisconnect
2
+ from fastapi.responses import HTMLResponse, JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
  from pywhispercpp.model import Model
5
  import uvicorn
6
  import tempfile
7
  import os
8
  from time import time
9
 
10
+ app = FastAPI(title="pyWhisperCPP Streaming API")
11
 
12
+ # Allow CORS (useful if you host frontend separately, but fine on Spaces too)
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
 
21
+ # Load Whisper.cpp model ONCE
22
+ # OPTIONS: 'tiny.en', 'base.en', etc.
23
+ model = Model("base.en")
24
 
 
 
 
25
 
26
+ # ---------- Simple HTML frontend ----------
27
+ @app.get("/", response_class=HTMLResponse)
28
+ async def index():
29
+ # Serve the index.html file
30
+ with open("index.html", "r", encoding="utf-8") as f:
31
+ return HTMLResponse(f.read())
32
 
33
+
34
+ # ---------- Normal file upload transcription ----------
35
  @app.post("/transcribe")
36
  async def transcribe(file: UploadFile = File(...)):
37
+ # Save uploaded audio temporarily
38
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
39
  temp.write(await file.read())
40
  temp.flush()
41
  audio_path = temp.name
42
 
43
+ try:
44
+ start = time()
45
+ segments = model.transcribe(audio_path)
46
+ text = " ".join(seg.text for seg in segments)
47
+ elapsed = round(time() - start, 3)
48
 
49
+ return {
50
+ "text": text,
51
+ "processing_time_seconds": elapsed
52
+ }
53
+ finally:
54
+ os.remove(audio_path)
55
 
56
 
57
+ # ---------- WebSocket streaming transcription ----------
 
 
58
  @app.websocket("/ws/transcribe_stream")
59
  async def websocket_transcription(websocket: WebSocket):
60
+ """
61
+ Receives binary audio chunks (WebM/Opus) from the browser via WebSocket,
62
+ periodically transcribes the buffered audio with Whisper.cpp,
63
+ and sends back partial text.
64
+ """
65
  await websocket.accept()
66
  buffer = b""
67
+ MIN_CHUNK_SIZE = 40_000 # bytes before running a transcription (tune this)
68
+
69
+ try:
70
+ while True:
71
+ message = await websocket.receive()
72
+
73
+ # Handle text messages (control)
74
+ if "text" in message and message["text"] is not None:
75
+ text_msg = message["text"]
76
+ if text_msg == "__END__":
77
+ # Finish stream
78
+ break
79
+ # Ignore other text controls for now
80
+ continue
81
+
82
+ # Handle binary audio data
83
+ chunk = message.get("bytes")
84
+ if not chunk:
85
+ continue
86
+
87
+ buffer += chunk
88
+
89
+ # When enough audio collected, transcribe
90
+ if len(buffer) >= MIN_CHUNK_SIZE:
91
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp:
92
+ temp.write(buffer)
93
+ temp.flush()
94
+ audio_path = temp.name
95
+
96
+ try:
97
+ segments = model.transcribe(audio_path)
98
+ text = " ".join(seg.text for seg in segments).strip()
99
+ finally:
100
+ os.remove(audio_path)
101
+
102
+ # Send partial transcript to client
103
+ if text:
104
+ await websocket.send_text(text)
105
+
106
+ # Clear buffer (or keep tail if you want overlap)
107
+ buffer = b""
108
+
109
+ # End-of-stream message
110
+ await websocket.send_text("[stream ended]")
111
+ except WebSocketDisconnect:
112
+ # Client disconnected
113
+ pass
114
+ finally:
115
+ await websocket.close()
116
 
117
 
118
  if __name__ == "__main__":
119
+ # For local testing. On Spaces, you don't usually run uvicorn manually.
120
  uvicorn.run(app, host="0.0.0.0", port=7860)