MFF212 commited on
Commit
37dec86
·
verified ·
1 Parent(s): 153d4fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -194
app.py CHANGED
@@ -1,219 +1,148 @@
1
  #!/usr/bin/env python3
2
  #
3
- # Copyright (c) 2025, Daily
4
- #
5
- # SPDX-License-Identifier: BSD 2-Clause License
6
  #
7
 
8
- import argparse
9
- import datetime
10
- import io
11
- import json
12
  import os
13
- import sys
14
- import wave
15
-
16
- import aiofiles
 
17
  import uvicorn
18
- from dotenv import load_dotenv
19
- from fastapi import FastAPI, WebSocket
20
- from fastapi.middleware.cors import CORSMiddleware
21
- from loguru import logger
22
-
23
- from pipecat.audio.vad.silero import SileroVADAnalyzer
24
- from pipecat.pipeline.pipeline import Pipeline
25
- from pipecat.pipeline.runner import PipelineRunner
26
- from pipecat.pipeline.task import PipelineParams, PipelineTask
27
- from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
28
- from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
29
- from pipecat.serializers.exotel import ExotelFrameSerializer
30
- from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
31
- from pipecat.services.deepgram.stt import DeepgramSTTService
32
- from pipecat.services.openai.llm import OpenAILLMService
33
- from pipecat.transports.network.fastapi_websocket import (
34
- FastAPIWebsocketParams,
35
- FastAPIWebsocketTransport,
36
- )
37
-
38
- # Load environment variables
39
- load_dotenv(override=True)
40
-
41
- # Configure logger
42
- logger.remove(0)
43
- logger.add(sys.stderr, level="DEBUG")
44
 
45
  # Create FastAPI app
46
- app = FastAPI(title="Pipecat Exotel Voice Chatbot", version="1.0.0")
47
-
48
- # Add CORS middleware
49
- app.add_middleware(
50
- CORSMiddleware,
51
- allow_origins=["*"], # Allow all origins for testing
52
- allow_credentials=True,
53
- allow_methods=["*"],
54
- allow_headers=["*"],
55
- )
56
-
57
-
58
- async def save_audio(server_name: str, audio: bytes, sample_rate: int, num_channels: int):
59
- """Save audio recording to file"""
60
- if len(audio) > 0:
61
- filename = (
62
- f"{server_name}_recording_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
63
- )
64
- with io.BytesIO() as buffer:
65
- with wave.open(buffer, "wb") as wf:
66
- wf.setsampwidth(2)
67
- wf.setnchannels(num_channels)
68
- wf.setframerate(sample_rate)
69
- wf.writeframes(audio)
70
- async with aiofiles.open(filename, "wb") as file:
71
- await file.write(buffer.getvalue())
72
- logger.info(f"Audio saved to {filename}")
73
- else:
74
- logger.info("No audio data to save")
75
-
76
-
77
- async def run_bot(websocket_client: WebSocket, call_id: str, testing: bool):
78
- """Run the voice chatbot pipeline"""
79
- transport = FastAPIWebsocketTransport(
80
- websocket=websocket_client,
81
- params=FastAPIWebsocketParams(
82
- audio_in_enabled=True,
83
- audio_out_enabled=True,
84
- add_wav_header=False,
85
- vad_enabled=True,
86
- vad_analyzer=SileroVADAnalyzer(),
87
- vad_audio_passthrough=True,
88
- serializer=ExotelFrameSerializer(stream_sid=call_id),
89
- ),
90
- )
91
-
92
- # Initialize services
93
- llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
94
- stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"), audio_passthrough=True)
95
- tts = ElevenLabsTTSService(
96
- api_key=os.getenv("ELEVEN_API_KEY"),
97
- voice_id=os.getenv("ELEVEN_VOICE_ID"),
98
- )
99
-
100
- # System message
101
- messages = [
102
- {
103
- "role": "system",
104
- "content": "You are a helpful assistant named Tasha. Your output will be converted to audio so don't include special characters in your answers. Respond with a short sentence.",
105
- },
106
- ]
107
-
108
- context = OpenAILLMContext(messages)
109
- context_aggregator = llm.create_context_aggregator(context)
110
- audiobuffer = AudioBufferProcessor(user_continuous_stream=not testing)
111
-
112
- # Create pipeline
113
- pipeline = Pipeline(
114
- [
115
- transport.input(), # WebSocket input from client
116
- stt, # Speech-To-Text
117
- context_aggregator.user(),
118
- llm, # LLM
119
- tts, # Text-To-Speech
120
- transport.output(), # WebSocket output to client
121
- audiobuffer, # Used to buffer the audio in the pipeline
122
- context_aggregator.assistant(),
123
- ]
124
- )
125
-
126
- task = PipelineTask(
127
- pipeline,
128
- params=PipelineParams(
129
- audio_in_sample_rate=8000,
130
- audio_out_sample_rate=8000,
131
- allow_interruptions=True,
132
- ),
133
- )
134
-
135
- @transport.event_handler("on_client_connected")
136
- async def on_client_connected(transport, client):
137
- # Start recording
138
- await audiobuffer.start_recording()
139
- # Kick off the conversation
140
- messages.append({"role": "system", "content": "Please introduce yourself to the user."})
141
- await task.queue_frames([context_aggregator.user().get_context_frame()])
142
-
143
- @transport.event_handler("on_client_disconnected")
144
- async def on_client_disconnected(transport, client):
145
- await task.cancel()
146
-
147
- @audiobuffer.event_handler("on_audio_data")
148
- async def on_audio_data(buffer, audio, sample_rate, num_channels):
149
- server_name = f"server_{websocket_client.client.port}"
150
- await save_audio(server_name, audio, sample_rate, num_channels)
151
-
152
- # Run the pipeline
153
- runner = PipelineRunner(handle_sigint=False, force_gc=True)
154
- await runner.run(task)
155
-
156
 
157
  @app.get("/health")
158
  async def health_check():
159
  """Health check endpoint"""
160
- return {"status": "healthy", "message": "Pipecat Exotel Voice Chatbot is running"}
161
-
162
 
163
  @app.websocket("/media")
164
- async def media_endpoint(websocket: WebSocket):
165
- """WebSocket endpoint for Exotel media streaming"""
166
  await websocket.accept()
167
- print("[*] Exotel WebSocket connection accepted")
168
 
169
  try:
170
  while True:
171
  data = await websocket.receive_text()
172
  message = json.loads(data)
173
- print(f"\n[*] Received Exotel message: {json.dumps(message, indent=2)}")
174
 
175
- # Handle start event to get call information
176
- if message.get('event') == 'start':
177
- stream_sid = message.get('streamSid', 'default')
178
- print(f"Starting bot for stream SID: {stream_sid}")
179
- await run_bot(websocket, stream_sid, app.state.testing)
180
- break
181
 
182
  except Exception as e:
183
  print(f"[!] Error in media endpoint: {str(e)}")
184
 
185
-
186
- def main():
187
- """Main function to run the server"""
188
- parser = argparse.ArgumentParser(description="Pipecat Exotel Voice Chatbot Server")
189
- parser.add_argument(
190
- "-t", "--test", action="store_true", default=False, help="set the server in testing mode"
191
- )
192
- parser.add_argument(
193
- "-p", "--port", type=int, default=8005, help="port to run the server on"
194
- )
195
- args, _ = parser.parse_known_args()
196
-
197
- app.state.testing = args.test
198
-
199
- print(f"[*] Pipecat Exotel Voice Chatbot running on port {args.port}")
200
- print(f"[*] Health check: http://localhost:{args.port}/health")
201
- print(f"[*] WebSocket endpoint: ws://localhost:{args.port}/media")
202
- print(f"[*] Testing mode: {'enabled' if args.test else 'disabled'}")
203
- print("")
204
- print("Required environment variables:")
205
- print(" OPENAI_API_KEY - Your OpenAI API key")
206
- print(" DEEPGRAM_API_KEY - Your Deepgram API key")
207
- print(" ELEVEN_API_KEY - Your ElevenLabs API key")
208
- print(" ELEVEN_VOICE_ID - Your ElevenLabs voice ID")
209
- print("")
210
-
211
- uvicorn.run(
212
- app,
213
- host="0.0.0.0",
214
- port=args.port
215
- )
216
-
217
-
218
- if __name__ == "__main__":
219
- main()
 
1
  #!/usr/bin/env python3
2
  #
3
+ # Hugging Face Compatible Version
4
+ # This version works with Hugging Face Spaces limitations
 
5
  #
6
 
 
 
 
 
7
  import os
8
+ import json
9
+ from fastapi import FastAPI, Request
10
+ from fastapi.responses import HTMLResponse, JSONResponse
11
+ from fastapi.staticfiles import StaticFiles
12
+ from fastapi.templating import Jinja2Templates
13
  import uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Create FastAPI app
16
+ app = FastAPI(title="Pipecat Voice Chatbot - Hugging Face", version="1.0.0")
17
+
18
+ # Serve static files
19
+ app.mount("/static", StaticFiles(directory="static"), name="static")
20
+ templates = Jinja2Templates(directory="templates")
21
+
22
+ @app.get("/", response_class=HTMLResponse)
23
+ async def home(request: Request):
24
+ """Home page with WebSocket client"""
25
+ return """
26
+ <!DOCTYPE html>
27
+ <html>
28
+ <head>
29
+ <title>Pipecat Voice Chatbot</title>
30
+ <style>
31
+ body { font-family: Arial, sans-serif; margin: 40px; }
32
+ .container { max-width: 800px; margin: 0 auto; }
33
+ .status { padding: 10px; margin: 10px 0; border-radius: 5px; }
34
+ .connected { background-color: #d4edda; color: #155724; }
35
+ .disconnected { background-color: #f8d7da; color: #721c24; }
36
+ .error { background-color: #f8d7da; color: #721c24; }
37
+ button { padding: 10px 20px; margin: 5px; font-size: 16px; }
38
+ #log { background: #f8f9fa; padding: 20px; border-radius: 5px; height: 300px; overflow-y: auto; }
39
+ </style>
40
+ </head>
41
+ <body>
42
+ <div class="container">
43
+ <h1>Pipecat Voice Chatbot</h1>
44
+ <div id="status" class="status disconnected">Disconnected</div>
45
+ <button onclick="connect()">Connect</button>
46
+ <button onclick="disconnect()">Disconnect</button>
47
+ <button onclick="sendStart()">Send Start Event</button>
48
+ <div id="log"></div>
49
+ </div>
50
+
51
+ <script>
52
+ let ws = null;
53
+ const status = document.getElementById('status');
54
+ const log = document.getElementById('log');
55
+
56
+ function logMessage(message) {
57
+ const div = document.createElement('div');
58
+ div.textContent = new Date().toLocaleTimeString() + ': ' + message;
59
+ log.appendChild(div);
60
+ log.scrollTop = log.scrollHeight;
61
+ }
62
+
63
+ function connect() {
64
+ const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
65
+ const wsUrl = `${protocol}//${window.location.host}/media`;
66
+
67
+ logMessage('Connecting to: ' + wsUrl);
68
+
69
+ ws = new WebSocket(wsUrl);
70
+
71
+ ws.onopen = function() {
72
+ status.textContent = 'Connected';
73
+ status.className = 'status connected';
74
+ logMessage('Connected to WebSocket');
75
+ };
76
+
77
+ ws.onmessage = function(event) {
78
+ logMessage('Received: ' + event.data);
79
+ };
80
+
81
+ ws.onclose = function() {
82
+ status.textContent = 'Disconnected';
83
+ status.className = 'status disconnected';
84
+ logMessage('WebSocket connection closed');
85
+ };
86
+
87
+ ws.onerror = function(error) {
88
+ status.textContent = 'Error';
89
+ status.className = 'status error';
90
+ logMessage('WebSocket error: ' + error);
91
+ };
92
+ }
93
+
94
+ function disconnect() {
95
+ if (ws) {
96
+ ws.close();
97
+ }
98
+ }
99
+
100
+ function sendStart() {
101
+ if (ws && ws.readyState === WebSocket.OPEN) {
102
+ const message = {
103
+ event: 'start',
104
+ streamSid: 'test_stream_' + Date.now(),
105
+ mediaFormat: {
106
+ sample_rate: 8000,
107
+ channels: 1,
108
+ encoding: 'linear16'
109
+ }
110
+ };
111
+ ws.send(JSON.stringify(message));
112
+ logMessage('Sent start event: ' + JSON.stringify(message));
113
+ } else {
114
+ logMessage('WebSocket not connected');
115
+ }
116
+ }
117
+ </script>
118
+ </body>
119
+ </html>
120
+ """
 
 
 
 
 
121
 
122
  @app.get("/health")
123
  async def health_check():
124
  """Health check endpoint"""
125
+ return {"status": "healthy", "message": "Pipecat Voice Chatbot is running on Hugging Face"}
 
126
 
127
  @app.websocket("/media")
128
+ async def media_endpoint(websocket):
129
+ """WebSocket endpoint - may not work on Hugging Face"""
130
  await websocket.accept()
131
+ print("[*] WebSocket connection accepted")
132
 
133
  try:
134
  while True:
135
  data = await websocket.receive_text()
136
  message = json.loads(data)
137
+ print(f"[*] Received message: {json.dumps(message, indent=2)}")
138
 
139
+ # Echo back the message for testing
140
+ await websocket.send_text(json.dumps({
141
+ "status": "received",
142
+ "message": "WebSocket working!",
143
+ "original": message
144
+ }))
145
 
146
  except Exception as e:
147
  print(f"[!] Error in media endpoint: {str(e)}")
148