Charan5775 commited on
Commit
15f3e71
·
verified ·
1 Parent(s): b5db09d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -131
app.py CHANGED
@@ -1,157 +1,59 @@
1
- from fastapi import FastAPI, UploadFile, File, WebSocket, WebSocketDisconnect
2
- from fastapi.responses import HTMLResponse, JSONResponse
3
- from fastapi.middleware.cors import CORSMiddleware
4
  from pywhispercpp.model import Model
5
- import uvicorn
6
- import tempfile
7
- import os
8
  from time import time
9
 
10
- app = FastAPI(title="pyWhisperCPP Streaming API")
11
-
12
- # Allow CORS (useful if you host frontend separately, but fine on Spaces too)
13
- app.add_middleware(
14
- CORSMiddleware,
15
- allow_origins=["*"],
16
- allow_credentials=True,
17
- allow_methods=["*"],
18
- allow_headers=["*"],
19
- )
20
-
21
- # Load Whisper.cpp model ONCE
22
- # OPTIONS: 'tiny.en', 'base.en', etc.
23
  model = Model("base.en")
24
 
 
 
 
25
 
26
- # ---------- Simple HTML frontend ----------
27
- @app.get("/", response_class=HTMLResponse)
28
- async def index():
29
- # Serve the index.html file
30
- with open("index.html", "r", encoding="utf-8") as f:
31
- return HTMLResponse(f.read())
32
-
33
-
34
- # ---------- Normal file upload transcription ----------
35
  @app.post("/transcribe")
36
  async def transcribe(file: UploadFile = File(...)):
37
- # Save uploaded audio temporarily
38
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
39
  temp.write(await file.read())
40
  temp.flush()
41
  audio_path = temp.name
42
 
43
- try:
44
- start = time()
45
- segments = model.transcribe(audio_path)
46
- text = " ".join(seg.text for seg in segments)
47
- elapsed = round(time() - start, 3)
48
-
49
- return {
50
- "text": text,
51
- "processing_time_seconds": elapsed
52
- }
53
- finally:
54
- os.remove(audio_path)
55
 
 
56
 
57
- # ---------- WebSocket streaming transcription ----------
58
- @app.websocket("/ws/transcribe_stream")
59
- async def websocket_transcription(websocket: WebSocket):
60
- """
61
- Receives binary audio chunks (WebM/Opus) from the browser via WebSocket,
62
- periodically transcribes the buffered audio with Whisper.cpp,
63
- and sends back partial text.
64
- """
65
  await websocket.accept()
66
  buffer = b""
67
- MIN_CHUNK_SIZE = 40_000 # bytes before running a transcription (tune this)
68
-
69
- try:
70
- while True:
71
- message = await websocket.receive()
72
-
73
- # Handle text messages (control)
74
- if "text" in message and message["text"] is not None:
75
- text_msg = message["text"]
76
- if text_msg == "__END__":
77
- # Finish stream
78
- break
79
- # Ignore other text controls for now
80
- continue
81
-
82
- # Handle binary audio data
83
- chunk = message.get("bytes")
84
- if not chunk:
85
- continue
86
-
87
- buffer += chunk
88
-
89
- # When enough audio collected, transcribe
90
- if len(buffer) >= MIN_CHUNK_SIZE:
91
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp:
92
- temp.write(buffer)
93
- temp.flush()
94
- audio_path = temp.name
95
-
96
- try:
97
- segments = model.transcribe(audio_path)
98
- text = " ".join(seg.text for seg in segments).strip()
99
- finally:
100
- os.remove(audio_path)
101
-
102
- # Send partial transcript to client
103
- if text:
104
- await websocket.send_text(text)
105
-
106
- # Clear buffer (or keep tail if you want overlap)
107
- buffer = b""
108
-
109
- # End-of-stream message
110
- await websocket.send_text("[stream ended]")
111
- except WebSocketDisconnect:
112
- # Client disconnected
113
- pass
114
- finally:
115
- await websocket.close()
116
-
117
- @app.websocket("/ws/transcribe_pcm")
118
- async def websocket_transcription_pcm(websocket: WebSocket):
119
- await websocket.accept()
120
- buffer = b""
121
- SAMPLE_RATE = 16000
122
- MIN_PCM_SIZE = SAMPLE_RATE * 2 * 3 # 3 seconds buffer
123
-
124
- try:
125
- while True:
126
- chunk = await websocket.receive_bytes()
127
-
128
- # If end control message (optional)
129
- if chunk == b"__END__":
130
- break
131
 
132
- buffer += chunk
 
 
 
133
 
134
- if len(buffer) >= MIN_PCM_SIZE:
135
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
136
- temp.write(buffer)
137
- temp.flush()
138
- audio_path = temp.name
 
139
 
140
- segments = model.transcribe(audio_path)
141
- text = " ".join(seg.text for seg in segments).strip()
142
- if text:
143
- await websocket.send_text(text)
144
 
145
- buffer = b""
146
- os.remove(audio_path)
 
147
 
148
- await websocket.send_text("[stream ended]")
149
- except:
150
- pass
151
- finally:
152
- await websocket.close()
153
 
154
 
155
  if __name__ == "__main__":
156
- # For local testing. On Spaces, you don't usually run uvicorn manually.
157
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI, UploadFile, File, WebSocket
 
 
2
  from pywhispercpp.model import Model
3
+ import uvicorn, tempfile, os
 
 
4
  from time import time
5
 
6
+ app = FastAPI(title="pyWhisperCPP API")
 
 
 
 
 
 
 
 
 
 
 
 
7
  model = Model("base.en")
8
 
9
+ @app.get("/")
10
+ def root():
11
+ return {"status": "Whisper.cpp API is running!"}
12
 
 
 
 
 
 
 
 
 
 
13
  @app.post("/transcribe")
14
  async def transcribe(file: UploadFile = File(...)):
 
15
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
16
  temp.write(await file.read())
17
  temp.flush()
18
  audio_path = temp.name
19
 
20
+ start = time()
21
+ segments = model.transcribe(audio_path)
22
+ text = " ".join(seg.text for seg in segments])
23
+ os.remove(audio_path)
 
 
 
 
 
 
 
 
24
 
25
+ return {"text": text}
26
 
27
+ # ================================
28
+ # 🔥 Real-time streaming endpoint
29
+ # ================================
30
+ @app.websocket("/ws/live")
31
+ async def websocket_live(websocket: WebSocket):
 
 
 
32
  await websocket.accept()
33
  buffer = b""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ while True:
36
+ data = await websocket.receive_bytes()
37
+ if data == b"__END__":
38
+ break
39
 
40
+ buffer += data
41
+ if len(buffer) > 32000: # ~1 second PCM16 16kHz
42
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp:
43
+ temp.write(buffer)
44
+ temp.flush()
45
+ audio_path = temp.name
46
 
47
+ segments = model.transcribe(audio_path)
48
+ text = " ".join(seg.text for seg in segments])
 
 
49
 
50
+ await websocket.send_text(text)
51
+ buffer = b""
52
+ os.remove(audio_path)
53
 
54
+ await websocket.send_text("[END]")
55
+ await websocket.close()
 
 
 
56
 
57
 
58
  if __name__ == "__main__":
 
59
  uvicorn.run(app, host="0.0.0.0", port=7860)