guifav commited on
Commit
7dfbe6a
·
1 Parent(s): d95b348
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Ai Caller
3
  emoji: 🌍
4
  colorFrom: pink
5
- colorTo: pink
6
  sdk: streamlit
7
  sdk_version: 1.41.0
8
  app_file: app.py
@@ -11,4 +11,36 @@ license: mit
11
  short_description: Call anyone using a robot
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Guilherme Favaron - Ai Caller
3
  emoji: 🌍
4
  colorFrom: pink
5
+ colorTo: yellow
6
  sdk: streamlit
7
  sdk_version: 1.41.0
8
  app_file: app.py
 
11
  short_description: Call anyone using a robot
12
  ---
13
 
14
+ # AI Dialer
15
+
16
+ An AI-powered phone dialer application that allows you to make calls using Twilio and interact with an AI assistant. The application uses FastAPI for the backend and Streamlit for the frontend interface.
17
+
18
+ ## Features
19
+
20
+ - Make phone calls through Twilio
21
+ - Real-time transcription
22
+ - AI-powered conversations
23
+ - Call recording functionality
24
+ - Historical call transcripts
25
+ - Interactive web interface
26
+
27
+ ## Environment Variables Required
28
+
29
+ ```
30
+ TWILIO_ACCOUNT_SID=your_twilio_sid
31
+ TWILIO_AUTH_TOKEN=your_twilio_token
32
+ APP_NUMBER=your_twilio_number
33
+ SERVER=your_server_url
34
+ SYSTEM_MESSAGE=your_system_message
35
+ INITIAL_MESSAGE=your_initial_message
36
+ LLM_SERVICE=openai
37
+ TTS_SERVICE=deepgram
38
+ ```
39
+
40
+ ## Deployment
41
+
42
+ This application is deployed on Hugging Face Spaces using Docker.
43
+
44
+ ## Contact the developer
45
+
46
+ mailto: falecom_guilhermefavaron@googlegroups.com
app.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ import os
5
+ from collections import deque
6
+ from typing import Dict
7
+
8
+ import dotenv
9
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect
10
+ from fastapi.responses import HTMLResponse
11
+ from twilio.rest import Client
12
+ from twilio.twiml.voice_response import Connect, VoiceResponse
13
+
14
+ from logger_config import get_logger
15
+ from services.call_context import CallContext
16
+ from services.llm_service import LLMFactory
17
+ from services.stream_service import StreamService
18
+ from services.transcription_service import TranscriptionService
19
+ from services.tts_service import TTSFactory
20
+
21
+ dotenv.load_dotenv()
22
+ app = FastAPI()
23
+ logger = get_logger("App")
24
+
25
+ # Global dictionary to store call contexts for each server instance
26
+ global call_contexts
27
+ call_contexts = {}
28
+
29
+ def get_server_url():
30
+ """Get the appropriate server URL based on the environment"""
31
+ return "localhost:8000"
32
+
33
+ def get_twilio_client():
34
+ """Get authenticated Twilio client"""
35
+ return Client(os.getenv("TWILIO_ACCOUNT_SID"), os.getenv("TWILIO_AUTH_TOKEN"))
36
+
37
+ @app.post("/incoming")
38
+ async def incoming_call() -> HTMLResponse:
39
+ """Handle incoming Twilio calls"""
40
+ server = get_server_url()
41
+ response = VoiceResponse()
42
+ connect = Connect()
43
+ connect.stream(url=f"wss://{server}/connection")
44
+ response.append(connect)
45
+ return HTMLResponse(content=str(response), status_code=200)
46
+
47
+ @app.get("/call_recording/{call_sid}")
48
+ async def get_call_recording(call_sid: str):
49
+ """Get the recording URL for a specific call."""
50
+ recording = get_twilio_client().calls(call_sid).recordings.list()
51
+ if recording:
52
+ return {"recording_url": f"https://api.twilio.com/{recording[0].uri}"}
53
+ return {"error": "Recording not found"}
54
+
55
+ @app.websocket("/connection")
56
+ async def websocket_endpoint(websocket: WebSocket):
57
+ """Handle WebSocket connection for media streaming"""
58
+ await websocket.accept()
59
+
60
+ llm_service_name = os.getenv("LLM_SERVICE", "openai")
61
+ tts_service_name = os.getenv("TTS_SERVICE", "deepgram")
62
+
63
+ logger.info(f"Using LLM service: {llm_service_name}")
64
+ logger.info(f"Using TTS service: {tts_service_name}")
65
+
66
+ llm_service = LLMFactory.get_llm_service(llm_service_name, CallContext())
67
+ stream_service = StreamService(websocket)
68
+ transcription_service = TranscriptionService()
69
+ tts_service = TTSFactory.get_tts_service(tts_service_name)
70
+
71
+ marks = deque()
72
+ interaction_count = 0
73
+
74
+ await transcription_service.connect()
75
+
76
+ async def process_media(msg):
77
+ await transcription_service.send(base64.b64decode(msg['media']['payload']))
78
+
79
+ async def handle_transcription(text):
80
+ nonlocal interaction_count
81
+ if not text:
82
+ return
83
+ logger.info(f"Interaction {interaction_count} – STT -> LLM: {text}")
84
+ await llm_service.completion(text, interaction_count)
85
+ interaction_count += 1
86
+
87
+ async def handle_llm_reply(llm_reply, icount):
88
+ logger.info(f"Interaction {icount}: LLM -> TTS: {llm_reply['partialResponse']}")
89
+ await tts_service.generate(llm_reply, icount)
90
+
91
+ async def handle_speech(response_index, audio, label, icount):
92
+ logger.info(f"Interaction {icount}: TTS -> TWILIO: {label}")
93
+ await stream_service.buffer(response_index, audio)
94
+
95
+ async def handle_audio_sent(mark_label):
96
+ marks.append(mark_label)
97
+
98
+ async def handle_utterance(text, stream_sid):
99
+ try:
100
+ if len(marks) > 0 and text.strip():
101
+ logger.info("Interruption detected, clearing system.")
102
+ await websocket.send_json({
103
+ "streamSid": stream_sid,
104
+ "event": "clear"
105
+ })
106
+
107
+ # Reset states
108
+ stream_service.reset()
109
+ llm_service.reset()
110
+ except Exception as e:
111
+ logger.error(f"Error while handling utterance: {e}")
112
+ raise
113
+
114
+ transcription_service.on('utterance', handle_utterance)
115
+ transcription_service.on('transcription', handle_transcription)
116
+ llm_service.on('llmreply', handle_llm_reply)
117
+ tts_service.on('speech', handle_speech)
118
+ stream_service.on('audiosent', handle_audio_sent)
119
+
120
+ # Queue for incoming WebSocket messages
121
+ message_queue = asyncio.Queue()
122
+
123
+ async def websocket_listener():
124
+ try:
125
+ while True:
126
+ data = await websocket.receive_text()
127
+ await message_queue.put(json.loads(data))
128
+ except WebSocketDisconnect:
129
+ logger.info("WebSocket disconnected")
130
+
131
+ async def message_processor():
132
+ while True:
133
+ msg = await message_queue.get()
134
+ if msg['event'] == 'start':
135
+ stream_sid = msg['start']['streamSid']
136
+ call_sid = msg['start']['callSid']
137
+
138
+ call_context = CallContext()
139
+
140
+ if os.getenv("RECORD_CALLS") == "true":
141
+ get_twilio_client().calls(call_sid).recordings.create({"recordingChannels": "dual"})
142
+
143
+ # Decide if the call was initiated from the UI or is an inbound
144
+ if call_sid not in call_contexts:
145
+ # Inbound call
146
+ call_context.system_message = os.environ.get("SYSTEM_MESSAGE")
147
+ call_context.initial_message = os.environ.get("INITIAL_MESSAGE")
148
+ call_context.call_sid = call_sid
149
+ call_contexts[call_sid] = call_context
150
+ else:
151
+ # Call from UI, reuse the existing context
152
+ call_context = call_contexts[call_sid]
153
+
154
+ llm_service.set_call_context(call_context)
155
+ stream_service.set_stream_sid(stream_sid)
156
+ transcription_service.set_stream_sid(stream_sid)
157
+
158
+ logger.info(f"Twilio -> Starting Media Stream for {stream_sid}")
159
+ await tts_service.generate({
160
+ "partialResponseIndex": None,
161
+ "partialResponse": call_context.initial_message
162
+ }, 1)
163
+ elif msg['event'] == 'media':
164
+ asyncio.create_task(process_media(msg))
165
+ elif msg['event'] == 'mark':
166
+ label = msg['mark']['name']
167
+ if label in marks:
168
+ marks.remove(label)
169
+ elif msg['event'] == 'stop':
170
+ logger.info("Media stream ended.")
171
+ break
172
+ message_queue.task_done()
173
+
174
+ try:
175
+ listener_task = asyncio.create_task(websocket_listener())
176
+ processor_task = asyncio.create_task(message_processor())
177
+ await asyncio.gather(listener_task, processor_task)
178
+ except asyncio.CancelledError:
179
+ logger.info("Tasks cancelled")
180
+ finally:
181
+ await transcription_service.disconnect()
182
+
183
+ @app.post("/start_call")
184
+ async def start_call(request: Dict[str, str]):
185
+ """Initiate a call using Twilio with optional system and initial messages."""
186
+ to_number = request.get("to_number")
187
+ system_message = request.get("system_message")
188
+ initial_message = request.get("initial_message")
189
+ logger.info(f"Initiating call to {to_number}")
190
+
191
+ server = get_server_url()
192
+ service_url = f"https://{server}/incoming"
193
+
194
+ if not to_number:
195
+ return {"error": "Missing 'to_number' in request"}
196
+
197
+ try:
198
+ client = get_twilio_client()
199
+ logger.info(f"Initiating call to {to_number} via {service_url}")
200
+ call = client.calls.create(
201
+ to=to_number,
202
+ from_=os.getenv("APP_NUMBER"),
203
+ url=service_url
204
+ )
205
+ call_sid = call.sid
206
+
207
+ call_context = CallContext()
208
+ call_contexts[call_sid] = call_context
209
+
210
+ # Set custom system and initial messages for this call if provided
211
+ call_context.system_message = system_message or os.getenv("SYSTEM_MESSAGE")
212
+ call_context.initial_message = initial_message or os.getenv("INITIAL_MESSAGE")
213
+ call_context.call_sid = call_sid
214
+
215
+ return {"call_sid": call_sid}
216
+ except Exception as e:
217
+ logger.error(f"Error initiating call: {str(e)}")
218
+ return {"error": f"Failed to initiate call: {str(e)}"}
219
+
220
+ @app.get("/call_status/{call_sid}")
221
+ async def get_call_status(call_sid: str):
222
+ """Get the status of a call."""
223
+ try:
224
+ client = get_twilio_client()
225
+ call = client.calls(call_sid).fetch()
226
+ return {"status": call.status}
227
+ except Exception as e:
228
+ logger.error(f"Error fetching call status: {str(e)}")
229
+ return {"error": f"Failed to fetch call status: {str(e)}"}
230
+
231
+ @app.post("/end_call")
232
+ async def end_call(request: Dict[str, str]):
233
+ """End a specific call."""
234
+ try:
235
+ call_sid = request.get("call_sid")
236
+ client = get_twilio_client()
237
+ client.calls(call_sid).update(status='completed')
238
+ return {"status": "success"}
239
+ except Exception as e:
240
+ logger.error(f"Error ending call {str(e)}")
241
+ return {"error": f"Failed to end requested call: {str(e)}"}
242
+
243
+ @app.get("/transcript/{call_sid}")
244
+ async def get_transcript(call_sid: str):
245
+ """Get the entire transcript for a specific call."""
246
+ call_context = call_contexts.get(call_sid)
247
+ if not call_context:
248
+ logger.info(f"[GET] Call not found for call SID: {call_sid}")
249
+ return {"error": "Call not found"}
250
+ return {"transcript": call_context.user_context}
251
+
252
+ @app.get("/all_transcripts")
253
+ async def get_all_transcripts():
254
+ """Get a list of all current call transcripts."""
255
+ try:
256
+ transcript_list = []
257
+ for call_sid, context in call_contexts.items():
258
+ transcript_list.append({
259
+ "call_sid": call_sid,
260
+ "transcript": context.user_context,
261
+ })
262
+ return {"transcripts": transcript_list}
263
+ except Exception as e:
264
+ logger.error(f"Error fetching all transcripts: {str(e)}")
265
+ return {"error": f"Failed to fetch all transcripts: {str(e)}"}
266
+
267
+ if __name__ == "__main__":
268
+ import uvicorn
269
+ logger.info("Starting server...")
270
+ logger.info(f"Backend server address set to: {os.getenv('SERVER')}")
271
+ port = int(os.getenv("PORT", 8000))
272
+ uvicorn.run(app, host="0.0.0.0", port=port)
functions/__pycache__/end_call.cpython-313.pyc ADDED
Binary file (1.26 kB). View file
 
functions/__pycache__/function_manifest.cpython-313.pyc ADDED
Binary file (685 Bytes). View file
 
functions/__pycache__/transfer_call.cpython-313.pyc ADDED
Binary file (1.4 kB). View file
 
functions/end_call.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from twilio.rest import Client
3
+ import asyncio
4
+
5
+ async def end_call(context, args):
6
+ # Retrieve the Twilio credentials from environment variables
7
+ account_sid = os.environ['TWILIO_ACCOUNT_SID']
8
+ auth_token = os.environ['TWILIO_AUTH_TOKEN']
9
+ client = Client(account_sid, auth_token)
10
+ call_sid = context.call_sid
11
+
12
+ # Fetch the call
13
+ call = client.calls(call_sid).fetch()
14
+
15
+ # Check if the call is already completed
16
+ if call.status in ['completed', 'failed', 'busy', 'no-answer', 'canceled']:
17
+ return f"Call already ended with status: {call.status}"
18
+
19
+ # Wait for 5 seconds before ending the call to ensure the goodbye goes through
20
+ await asyncio.sleep(5)
21
+
22
+ # End the call
23
+ call = client.calls(call_sid).update(status='completed')
24
+
25
+ return f"Call ended successfully. Final status: {call.status}"
functions/function_manifest.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tools = [
2
+ {
3
+ "type": "function",
4
+ "function": {
5
+ "name": "transfer_call",
6
+ "description": "Transfer call to a human, only do this if the user insists on it.",
7
+ "parameters": {
8
+ "type": "object",
9
+ "properties": {}
10
+ },
11
+ "say": "Transferring your call, please wait."
12
+ }
13
+ },
14
+
15
+ {
16
+ "type": "function",
17
+ "function": {
18
+ "name": "end_call",
19
+ "description": "End the current call but always ask for confirmation unless its a natural place in the conversation (and your intent is fullfilled) to end the call.",
20
+ "parameters": {
21
+ "type": "object",
22
+ "properties": {}
23
+ },
24
+ "say": "Goodbye."
25
+ }
26
+ }
27
+ ]
functions/transfer_call.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from twilio.rest import Client
3
+ import asyncio
4
+
5
+ async def transfer_call(context, args):
6
+ # Retrieve the active call using the CallSid
7
+ account_sid = os.environ['TWILIO_ACCOUNT_SID']
8
+ auth_token = os.environ['TWILIO_AUTH_TOKEN']
9
+ transfer_number = os.environ['TRANSFER_NUMBER']
10
+
11
+ client = Client(account_sid, auth_token)
12
+ call_sid = context.call_sid
13
+
14
+ # Wait for 10 seconds before transferring the call
15
+ await asyncio.sleep(8)
16
+
17
+ try:
18
+ call = client.calls(call_sid).fetch()
19
+
20
+ # Update the call with the transfer number
21
+ call = client.calls(call_sid).update(
22
+ url=f'http://twimlets.com/forward?PhoneNumber={transfer_number}',
23
+ method='POST'
24
+ )
25
+
26
+ return f"Call transferred."
27
+
28
+ except Exception as e:
29
+ return f"Error transferring call: {str(e)}"
logger_config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ from loguru import logger
4
+
5
+ # Remove the default handler
6
+ logger.remove()
7
+
8
+ # Add a new handler with INFO level
9
+ logger.add(
10
+ sys.stderr,
11
+ format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan> - <level>{message}</level>",
12
+ level="INFO",
13
+ colorize=True
14
+ )
15
+
16
+ def get_logger(name):
17
+ return logger.bind(name=name)
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.68.0
2
+ uvicorn==0.15.0
3
+ python-dotenv==0.19.0
4
+ streamlit==1.22.0
5
+ twilio==7.17.0
6
+ requests==2.28.2
7
+ python-multipart==0.0.5
8
+ websockets>=11.0
9
+ typing-extensions==4.5.0
10
+ python-jose==3.3.0
11
+ passlib==1.7.4
12
+ bcrypt==4.0.1
13
+ pydantic==1.10.7
14
+ aiohttp==3.8.4
15
+ boto3==1.26.137
16
+ openai==0.27.7
17
+ deepgram-sdk==2.11.0
18
+ elevenlabs==0.2.24
19
+ anthropic==0.3.6
20
+ requests
services/__pycache__/call_context.cpython-313.pyc ADDED
Binary file (975 Bytes). View file
 
services/__pycache__/event_emmiter.cpython-313.pyc ADDED
Binary file (2.84 kB). View file
 
services/__pycache__/llm_service.cpython-313.pyc ADDED
Binary file (15.1 kB). View file
 
services/__pycache__/stream_service.cpython-313.pyc ADDED
Binary file (3.27 kB). View file
 
services/__pycache__/transcription_service.cpython-313.pyc ADDED
Binary file (7.42 kB). View file
 
services/__pycache__/tts_service.cpython-313.pyc ADDED
Binary file (8.28 kB). View file
 
services/call_context.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+
3
+
4
+ class CallContext:
5
+ """Store context for the current call."""
6
+ def __init__(self):
7
+ self.stream_sid: Optional[str] = None
8
+ self.call_sid: Optional[str] = None
9
+ self.call_ended: bool = False
10
+ self.user_context: List = []
11
+ self.system_message: str = ""
12
+ self.initial_message: str = ""
13
+ self.start_time: Optional[str] = None
14
+ self.end_time: Optional[str] = None
15
+ self.final_status: Optional[str] = None
16
+
services/event_emmiter.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from typing import Any, Callable, Dict, List
3
+
4
+
5
+ class EventEmitter:
6
+ """
7
+ A class that represents an event emitter.
8
+
9
+ An event emitter allows registering callbacks for specific events and emitting those events
10
+ with optional arguments and keyword arguments.
11
+ """
12
+
13
+ def __init__(self):
14
+ """
15
+ Initializes an instance of the EventEmitter class.
16
+ """
17
+ self._events: Dict[str, List[Callable]] = {}
18
+
19
+ def on(self, event: str, callback: Callable):
20
+ """
21
+ Registers a callback for a specific event.
22
+
23
+ Args:
24
+ event (str): The name of the event.
25
+ callback (Callable): The callback function to be executed when the event is emitted.
26
+ """
27
+ if event not in self._events:
28
+ self._events[event] = []
29
+ self._events[event].append(callback)
30
+
31
+ async def emit(self, event: str, *args: Any, **kwargs: Any):
32
+ """
33
+ Emits an event and executes all registered callbacks for that event.
34
+
35
+ Args:
36
+ event (str): The name of the event.
37
+ *args (Any): Optional positional arguments to be passed to the callbacks.
38
+ **kwargs (Any): Optional keyword arguments to be passed to the callbacks.
39
+ """
40
+ if event in self._events:
41
+ for callback in self._events[event]:
42
+ await self._run_callback(callback, *args, **kwargs)
43
+
44
+ async def _run_callback(self, callback: Callable, *args: Any, **kwargs: Any):
45
+ """
46
+ Runs a callback function with the provided arguments.
47
+
48
+ Args:
49
+ callback (Callable): The callback function to be executed.
50
+ *args (Any): Optional positional arguments to be passed to the callback.
51
+ **kwargs (Any): Optional keyword arguments to be passed to the callback.
52
+ """
53
+ if asyncio.iscoroutinefunction(callback):
54
+ await callback(*args, **kwargs)
55
+ else:
56
+ callback(*args, **kwargs)
services/llm_service.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ import json
3
+ import os
4
+ import re
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any, Dict, List
7
+
8
+ import anthropic
9
+ from openai import AsyncOpenAI
10
+
11
+ from functions.function_manifest import tools
12
+ from logger_config import get_logger
13
+ from services.call_context import CallContext
14
+ from services.event_emmiter import EventEmitter
15
+
16
+ logger = get_logger("LLMService")
17
+
18
+ class AbstractLLMService(EventEmitter, ABC):
19
+ def __init__(self, context: CallContext):
20
+ super().__init__()
21
+ self.system_message = context.system_message
22
+ self.initial_message = context.initial_message
23
+ self.context = context
24
+ self.user_context = [
25
+ {"role": "user", "content": "Hello"},
26
+ {"role": "assistant", "content": self.initial_message}
27
+ ]
28
+ self.partial_response_index = 0
29
+ self.available_functions = {}
30
+ for tool in tools:
31
+ function_name = tool['function']['name']
32
+ module = importlib.import_module(f'functions.{function_name}')
33
+ self.available_functions[function_name] = getattr(module, function_name)
34
+ self.sentence_buffer = ""
35
+ context.user_context = self.user_context
36
+
37
+ def set_call_context(self, context: CallContext):
38
+ self.context = context
39
+ self.user_context = [
40
+ {"role": "user", "content": "Hello"},
41
+ {"role": "assistant", "content": context.initial_message}
42
+ ]
43
+ context.user_context = self.user_context
44
+ self.system_message = context.system_message
45
+ self.initial_message = context.initial_message
46
+
47
+
48
+ @abstractmethod
49
+ async def completion(self, text: str, interaction_count: int, role: str = 'user', name: str = 'user'):
50
+ pass
51
+
52
+ def reset(self):
53
+ self.partial_response_index = 0
54
+
55
+ def validate_function_args(self, args):
56
+ try:
57
+ return json.loads(args)
58
+ except json.JSONDecodeError:
59
+ logger.info('Warning: Invalid function arguments returned by LLM:', args)
60
+ return {}
61
+
62
+ @staticmethod
63
+ def convert_openai_tools_to_anthropic(openai_tools):
64
+ anthropic_tools = []
65
+ for tool in openai_tools:
66
+ if tool['type'] == 'function':
67
+ function = tool['function']
68
+ anthropic_tool = {
69
+ "name": function['name'],
70
+ "description": function.get('description', ''),
71
+ "input_schema": {
72
+ "type": "object",
73
+ "properties": function.get('parameters', {}).get('properties', {}),
74
+ "required": function.get('parameters', {}).get('required', [])
75
+ }
76
+ }
77
+
78
+ # Remove 'description' from individual properties if present
79
+ for prop in anthropic_tool['input_schema']['properties'].values():
80
+ prop.pop('description', None)
81
+
82
+ # If there are no properties, set an empty dict
83
+ if not anthropic_tool['input_schema']['properties']:
84
+ anthropic_tool['input_schema']['properties'] = {}
85
+
86
+ anthropic_tools.append(anthropic_tool)
87
+
88
+ return anthropic_tools
89
+
90
+ def split_into_sentences(self, text):
91
+ # Split the text into sentences, keeping the separators
92
+ sentences = re.split(r'([.!?])', text)
93
+ # Pair the sentences with their separators
94
+ sentences = [''.join(sentences[i:i+2]) for i in range(0, len(sentences), 2)]
95
+ return sentences
96
+
97
+ async def emit_complete_sentences(self, text, interaction_count):
98
+ self.sentence_buffer += text
99
+ sentences = self.split_into_sentences(self.sentence_buffer)
100
+
101
+ # Emit all complete sentences
102
+ for sentence in sentences[:-1]:
103
+ await self.emit('llmreply', {
104
+ "partialResponseIndex": self.partial_response_index,
105
+ "partialResponse": sentence.strip()
106
+ }, interaction_count)
107
+ self.partial_response_index += 1
108
+
109
+ # Keep the last (potentially incomplete) sentence in the buffer
110
+ self.sentence_buffer = sentences[-1] if sentences else ""
111
+
112
+ class OpenAIService(AbstractLLMService):
113
+ def __init__(self, context: CallContext):
114
+ super().__init__(context)
115
+ self.openai = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
116
+
117
+ async def completion(self, text: str, interaction_count: int, role: str = 'user', name: str = 'user'):
118
+ try:
119
+ self.user_context.append({"role": role, "content": text, "name": name})
120
+ messages = [{"role": "system", "content": self.system_message}] + self.user_context
121
+
122
+ stream = await self.openai.chat.completions.create(
123
+ model="gpt-4o",
124
+ messages=messages,
125
+ tools=tools,
126
+ stream=True,
127
+ )
128
+
129
+ complete_response = ""
130
+ function_name = ""
131
+ function_args = ""
132
+
133
+ async for chunk in stream:
134
+ delta = chunk.choices[0].delta
135
+ content = delta.content or ""
136
+ tool_calls = delta.tool_calls
137
+
138
+ if tool_calls:
139
+ for tool_call in tool_calls:
140
+ if tool_call.function and tool_call.function.name:
141
+ logger.info(f"Function call detected: {tool_call.function.name}")
142
+ function_name = tool_call.function.name
143
+ function_args += tool_call.function.arguments or ""
144
+ else:
145
+ complete_response += content
146
+ await self.emit_complete_sentences(content, interaction_count)
147
+
148
+ if chunk.choices[0].finish_reason == "tool_calls":
149
+ logger.info(f"Function call detected: {function_name}")
150
+ function_to_call = self.available_functions[function_name]
151
+ function_args = self.validate_function_args(function_args)
152
+
153
+ tool_data = next((tool for tool in tools if tool['function']['name'] == function_name), None)
154
+ say = tool_data['function']['say']
155
+
156
+ await self.emit('llmreply', {
157
+ "partialResponseIndex": None,
158
+ "partialResponse": say
159
+ }, interaction_count)
160
+
161
+ self.user_context.append({"role": "assistant", "content": say})
162
+
163
+ function_response = await function_to_call(self.context, function_args)
164
+
165
+ logger.info(f"Function {function_name} called with args: {function_args}")
166
+
167
+ if function_name != "end_call":
168
+ await self.completion(function_response, interaction_count, 'function', function_name)
169
+
170
+ # Emit any remaining content in the buffer
171
+ if self.sentence_buffer.strip():
172
+ await self.emit('llmreply', {
173
+ "partialResponseIndex": self.partial_response_index,
174
+ "partialResponse": self.sentence_buffer.strip()
175
+ }, interaction_count)
176
+ self.sentence_buffer = ""
177
+
178
+ self.user_context.append({"role": "assistant", "content": complete_response})
179
+
180
+ except Exception as e:
181
+ logger.error(f"Error in OpenAIService completion: {str(e)}")
182
+
183
+
184
+ class AnthropicService(AbstractLLMService):
185
+ def __init__(self, context: CallContext):
186
+ super().__init__(context)
187
+ self.client = anthropic.AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
188
+ # Add a dummy user message to ensure the first message is from the user
189
+ self.user_context = [
190
+ {"role": "user", "content": "Hello"},
191
+ {"role": "assistant", "content": self.initial_message}
192
+ ]
193
+
194
+ async def completion(self, text: str, interaction_count: int, role: str = 'user', name: str = 'user'):
195
+ try:
196
+ self.user_context.append({"role": role, "content": text})
197
+
198
+ messages = [{"role": msg["role"], "content": msg["content"]} for msg in self.user_context]
199
+
200
+ async with self.client.messages.stream(
201
+ model="claude-3-opus-20240229",
202
+ max_tokens=300,
203
+ system=self.system_message,
204
+ messages=messages,
205
+ tools=self.convert_openai_tools_to_anthropic(tools),
206
+ ) as stream:
207
+ complete_response = ""
208
+ async for event in stream:
209
+ if event.type == "text":
210
+ content = event.text
211
+ complete_response += content
212
+ await self.emit_complete_sentences(content, interaction_count)
213
+ elif event.type == "tool_call":
214
+ function_name = event.tool_call.function.name
215
+ function_args = event.tool_call.function.arguments
216
+ logger.info(f"Function call detected: {function_name}")
217
+ function_to_call = self.available_functions[function_name]
218
+ function_args = self.validate_function_args(function_args)
219
+
220
+ tool_data = next((tool for tool in tools if tool['function']['name'] == function_name), None)
221
+ say = tool_data['function']['say']
222
+
223
+ await self.emit('llmreply', {
224
+ "partialResponseIndex": None,
225
+ "partialResponse": say
226
+ }, interaction_count)
227
+
228
+ function_response = await function_to_call(function_args)
229
+
230
+ logger.info(f"Function {function_name} called with args: {function_args}")
231
+
232
+ if function_name != "end_call":
233
+ await self.completion(function_response, interaction_count, 'function', function_name)
234
+
235
+ # Emit any remaining content in the buffer
236
+ if self.sentence_buffer.strip():
237
+ await self.emit('llmreply', {
238
+ "partialResponseIndex": self.partial_response_index,
239
+ "partialResponse": self.sentence_buffer.strip()
240
+ }, interaction_count)
241
+ self.sentence_buffer = ""
242
+
243
+ final_message = await stream.get_final_message()
244
+ self.user_context.append({"role": "assistant", "content": final_message.content[0].text})
245
+
246
+ except Exception as e:
247
+ logger.error(f"Error in AnthropicService completion: {str(e)}")
248
+
249
+ class LLMFactory:
250
+ @staticmethod
251
+ def get_llm_service(service_name: str, context: CallContext) -> AbstractLLMService:
252
+ if service_name.lower() == "openai":
253
+ return OpenAIService(context)
254
+ elif service_name.lower() == "anthropic":
255
+ return AnthropicService(context)
256
+ else:
257
+ raise ValueError(f"Unsupported LLM service: {service_name}")
services/stream_service.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from typing import Dict
3
+
4
+ from fastapi import WebSocket
5
+
6
+ from logger_config import get_logger
7
+ from services.event_emmiter import EventEmitter
8
+
9
+ logger = get_logger("Stream")
10
+
11
+ class StreamService(EventEmitter):
12
+ def __init__(self, websocket: WebSocket):
13
+ super().__init__()
14
+ self.ws = websocket
15
+ self.expected_audio_index = 0
16
+ self.audio_buffer: Dict[int, str] = {}
17
+ self.stream_sid = ''
18
+
19
+ def set_stream_sid(self, stream_sid: str):
20
+ self.stream_sid = stream_sid
21
+
22
+ async def buffer(self, index: int, audio: str):
23
+ if index is None:
24
+ await self.send_audio(audio)
25
+ elif index == self.expected_audio_index:
26
+ await self.send_audio(audio)
27
+ self.expected_audio_index += 1
28
+
29
+ while self.expected_audio_index in self.audio_buffer:
30
+ buffered_audio = self.audio_buffer[self.expected_audio_index]
31
+ await self.send_audio(buffered_audio)
32
+ del self.audio_buffer[self.expected_audio_index]
33
+ self.expected_audio_index += 1
34
+ else:
35
+ self.audio_buffer[index] = audio
36
+
37
+ def reset(self):
38
+ self.expected_audio_index = 0
39
+ self.audio_buffer = {}
40
+
41
+ async def send_audio(self, audio: str):
42
+ await self.ws.send_json({
43
+ "streamSid": self.stream_sid,
44
+ "event": "media",
45
+ "media": {
46
+ "payload": audio
47
+ }
48
+ })
49
+
50
+ mark_label = str(uuid.uuid4())
51
+
52
+ await self.ws.send_json({
53
+ "streamSid": self.stream_sid,
54
+ "event": "mark",
55
+ "mark": {
56
+ "name": mark_label
57
+ }
58
+ })
59
+
60
+ await self.emit('audiosent', mark_label)
services/transcription_service.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from deepgram import DeepgramClient, LiveOptions, LiveTranscriptionEvents
4
+
5
+ from logger_config import get_logger
6
+ from services.event_emmiter import EventEmitter
7
+
8
+ logger = get_logger("Transcription")
9
+
10
+ class TranscriptionService(EventEmitter):
11
+ def __init__(self):
12
+ super().__init__()
13
+ self.client = DeepgramClient(os.getenv("DEEPGRAM_API_KEY"))
14
+ self.deepgram_live = None
15
+ self.final_result = ""
16
+ self.speech_final = False
17
+ self.stream_sid = None
18
+
19
+ def set_stream_sid(self, stream_id):
20
+ self.stream_sid = stream_id
21
+
22
+ def get_stream_sid(self):
23
+ return self.stream_sid
24
+
25
+ async def connect(self):
26
+ self.deepgram_live = self.client.listen.asynclive.v("1")
27
+ await self.deepgram_live.start(LiveOptions(
28
+ model="nova-2",
29
+ language="en-US",
30
+ encoding="mulaw",
31
+ sample_rate=8000,
32
+ channels=1,
33
+ punctuate=True,
34
+ interim_results=True,
35
+ endpointing=200,
36
+ utterance_end_ms=1000
37
+ ))
38
+
39
+ self.deepgram_live.on(LiveTranscriptionEvents.Transcript, self.handle_transcription)
40
+ self.deepgram_live.on(LiveTranscriptionEvents.Error, self.handle_error)
41
+ self.deepgram_live.on(LiveTranscriptionEvents.Close, self.handle_close)
42
+ self.deepgram_live.on(LiveTranscriptionEvents.Warning, self.handle_warning)
43
+ self.deepgram_live.on(LiveTranscriptionEvents.Metadata, self.handle_metadata)
44
+ self.deepgram_live.on(LiveTranscriptionEvents.UtteranceEnd, self.handle_utterance_end)
45
+
46
+ async def handle_utterance_end(self, self_obj, utterance_end):
47
+ try:
48
+ if not self.speech_final:
49
+ logger.info(f"UtteranceEnd received before speech was final, emit the text collected so far: {self.final_result}")
50
+ await self.emit('transcription', self.final_result)
51
+ self.final_result = ''
52
+ self.speech_final = True
53
+ return
54
+ else:
55
+ return
56
+ except Exception as e:
57
+ logger.error(f"Error while handling utterance end: {e}")
58
+ e.print_stack()
59
+
60
+ async def handle_transcription(self, self_obj, result):
61
+ try:
62
+ alternatives = result.channel.alternatives if hasattr(result, 'channel') else []
63
+ text = alternatives[0].transcript if alternatives else ""
64
+
65
+ if result.is_final and text.strip():
66
+ self.final_result += f" {text}"
67
+ if result.speech_final:
68
+ self.speech_final = True
69
+ await self.emit('transcription', self.final_result)
70
+ self.final_result = ''
71
+ else:
72
+ self.speech_final = False
73
+ else:
74
+ if text.strip():
75
+ stream_sid = self.stream_sid
76
+ await self.emit('utterance', text, stream_sid)
77
+ except Exception as e:
78
+ logger.error(f"Error while handling transcription: {e}")
79
+ e.print_stack()
80
+
81
+
82
+ async def handle_error(self, self_obj, error):
83
+ logger.error(f"Deepgram error: {error}")
84
+ self.is_connected = False
85
+
86
+ async def handle_warning(self, self_obj, warning):
87
+ logger.info('Deepgram warning:', warning)
88
+
89
+ async def handle_metadata(self, self_obj, metadata):
90
+ logger.info('Deepgram metadata:', metadata)
91
+
92
+ async def handle_close(self, self_obj, close):
93
+ logger.info("Deepgram connection closed")
94
+ self.is_connected = False
95
+
96
+ async def send(self, payload: bytes):
97
+ if self.deepgram_live:
98
+ await self.deepgram_live.send(payload)
99
+
100
+ async def disconnect(self):
101
+ if self.deepgram_live:
102
+ await self.deepgram_live.finish()
103
+ self.deepgram_live = None
104
+ self.is_connected = False
105
+ logger.info("Disconnected from Deepgram")
services/tts_service.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import base64
3
+ import os
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Dict
6
+
7
+ import aiohttp
8
+ import numpy as np
9
+ from deepgram import DeepgramClient, LiveOptions
10
+ from dotenv import load_dotenv
11
+
12
+ from logger_config import get_logger
13
+ from services.event_emmiter import EventEmitter
14
+
15
+ load_dotenv()
16
+ logger = get_logger("TTS")
17
+
18
+
19
+ class AbstractTTSService(EventEmitter, ABC):
20
+ @abstractmethod
21
+ async def generate(self, llm_reply: Dict[str, Any], interaction_count: int):
22
+ pass
23
+
24
+ @abstractmethod
25
+ async def set_voice(self, voice_id: str):
26
+ pass
27
+
28
+ @abstractmethod
29
+ async def disconnect(self):
30
+ pass
31
+
32
+ class ElevenLabsTTS(AbstractTTSService):
33
+ def __init__(self):
34
+ super().__init__()
35
+ self.voice_id = os.getenv("ELEVENLABS_VOICE_ID")
36
+ self.api_key = os.getenv("ELEVENLABS_API_KEY")
37
+ self.model_id = os.getenv("ELEVENLABS_MODEL_ID")
38
+ self.speech_buffer = {}
39
+
40
+
41
+ def set_voice(self, voice_id):
42
+ self.voice_id = voice_id
43
+
44
+ async def disconnect(self):
45
+ # ElevenLabs client doesn't require explicit disconnection
46
+ return
47
+
48
+
49
+ async def generate(self, llm_reply: Dict[str, Any], interaction_count: int):
50
+ partial_response_index, partial_response = llm_reply['partialResponseIndex'], llm_reply['partialResponse']
51
+
52
+ if not partial_response:
53
+ return
54
+
55
+ try:
56
+ output_format = "ulaw_8000"
57
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
58
+ headers = {
59
+ "xi-api-key": self.api_key,
60
+ "Content-Type": "application/json",
61
+ "Accept": "audio/wav"
62
+ }
63
+ params = {
64
+ "output_format": output_format,
65
+ "optimize_streaming_latency": 4
66
+ }
67
+ data = {
68
+ "model_id": self.model_id,
69
+ "text": partial_response
70
+ }
71
+
72
+ async with aiohttp.ClientSession() as session:
73
+ async with session.post(url, headers=headers, params=params, json=data) as response:
74
+ if response.status == 200:
75
+ audio_content = await response.read()
76
+ audio_base64 = base64.b64encode(audio_content).decode('utf-8')
77
+ await self.emit('speech', partial_response_index, audio_base64, partial_response, interaction_count)
78
+ except Exception as err:
79
+ logger.error("Error occurred in ElevenLabs TTS service", exc_info=True)
80
+ logger.error(str(err))
81
+
82
+
83
+ class DeepgramTTS(AbstractTTSService):
84
+ def __init__(self):
85
+ super().__init__()
86
+ self.client = DeepgramClient(os.getenv("DEEPGRAM_API_KEY"))
87
+
88
+ async def generate(self, llm_reply, interaction_count):
89
+ partial_response_index = llm_reply['partialResponseIndex']
90
+ partial_response = llm_reply['partialResponse']
91
+
92
+ if not partial_response:
93
+ return
94
+
95
+ try:
96
+ source = {
97
+ "text": partial_response
98
+ }
99
+
100
+ options = {
101
+ "model": "aura-asteria-en",
102
+ "encoding": "mulaw",
103
+ "sample_rate": 8000
104
+ }
105
+
106
+ response = await self.client.asyncspeak.v("1").stream(
107
+ source={"text": partial_response},
108
+ options=options
109
+ )
110
+
111
+ if response.stream:
112
+ audio_content = response.stream.getvalue()
113
+
114
+ # Convert audio to numpy array
115
+ audio_array = np.frombuffer(audio_content, dtype=np.uint8)
116
+
117
+ # Trim the first 10ms (80 samples at 8000Hz) to remove the initial noise
118
+ trim_samples = 80
119
+ trimmed_audio = audio_array[trim_samples:]
120
+
121
+ # Convert back to bytes
122
+ trimmed_audio_bytes = trimmed_audio.tobytes()
123
+
124
+ audio_base64 = base64.b64encode(trimmed_audio_bytes).decode('utf-8')
125
+ await self.emit('speech', partial_response_index, audio_base64, partial_response, interaction_count)
126
+ else:
127
+ logger.error("Error in TTS generation: No audio stream returned")
128
+
129
+ except Exception as e:
130
+ logger.error(f"Error in TTS generation: {str(e)}")
131
+
132
+
133
+ async def set_voice(self, voice_id):
134
+ logger.info(f"Attempting to set voice to {voice_id}, but Deepgram TTS doesn't support direct voice selection.")
135
+ # TODO(akiani): Implement voice selection in Deepgram TTS
136
+
137
+ async def disconnect(self):
138
+ # Deepgram client doesn't require explicit disconnection
139
+ logger.info("DeepgramTTS service disconnected")
140
+
141
+
142
+ class TTSFactory:
143
+ @staticmethod
144
+ def get_tts_service(service_name: str) -> AbstractTTSService:
145
+ if service_name.lower() == "elevenlabs":
146
+ return ElevenLabsTTS()
147
+ elif service_name.lower() == "deepgram":
148
+ return DeepgramTTS()
149
+ else:
150
+ raise ValueError(f"Unsupported TTS service: {service_name}")
151
+
152
+ # Usage in your main application
153
+ tts_service_name = os.getenv("TTS_SERVICE", "deepgram") # Default to deepgram if not specified
154
+ tts_service = TTSFactory.get_tts_service(tts_service_name)
start.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ export PORT=${PORT:-7860}
3
+
4
+ # Start FastAPI
5
+ uvicorn app:app --host 0.0.0.0 --port 8000 &
6
+
7
+ # Aguardar o backend subir
8
+ sleep 5
9
+
10
+ # Iniciar Streamlit na porta $PORT
11
+ streamlit run streamlit_app.py --server.port $PORT --server.address 0.0.0.0
streamlit_app.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import requests
4
+ import streamlit as st
5
+ import dotenv
6
+
7
+ dotenv.load_dotenv(verbose=True)
8
+
9
+ st.set_page_config(page_title="Guilherme Favaron - AI Dialer", page_icon="📞", layout="wide")
10
+
11
+ def get_api_url():
12
+ """Get the appropriate API URL based on the environment"""
13
+ if os.getenv('SPACE_ID'):
14
+ # Para comunicação interna no Spaces
15
+ return "http://localhost:8000"
16
+ # Para desenvolvimento local
17
+ port = os.getenv('PORT', '8000')
18
+ return f"http://localhost:{port}"
19
+
20
+ def make_request(method, endpoint, **kwargs):
21
+ """Unified request handler with proper error handling"""
22
+ base_url = get_api_url()
23
+ # Make sure the endpoint starts with a slash
24
+ if not endpoint.startswith('/'):
25
+ endpoint = f"/{endpoint}"
26
+ url = f"{base_url}{endpoint}"
27
+
28
+ try:
29
+ response = requests.request(method, url, **kwargs)
30
+ response.raise_for_status()
31
+ return response.json()
32
+ except requests.RequestException as e:
33
+ st.error(f"Error connecting to {endpoint}: {str(e)}")
34
+ if kwargs.get('debug', False):
35
+ st.error(f"Attempted URL: {url}")
36
+ return None
37
+
38
+ def init_session_state():
39
+ if 'call_active' not in st.session_state:
40
+ st.session_state.call_active = False
41
+ st.session_state.call_sid = None
42
+ st.session_state.transcript = []
43
+ st.session_state.system_message = os.getenv("SYSTEM_MESSAGE")
44
+ st.session_state.initial_message = os.getenv("INITIAL_MESSAGE")
45
+ st.session_state.all_transcripts = fetch_all_transcripts()
46
+ st.session_state.recording_info = None
47
+ st.session_state.call_selector = "Current Call"
48
+
49
+ def handle_call_start(phone_number):
50
+ with st.spinner(f"Calling {phone_number}..."):
51
+ payload = {
52
+ "to_number": phone_number,
53
+ "system_message": st.session_state.system_message,
54
+ "initial_message": st.session_state.initial_message
55
+ }
56
+ result = make_request('POST', '/start_call', json=payload, timeout=10)
57
+
58
+ if not result:
59
+ return
60
+
61
+ if call_sid := result.get('call_sid'):
62
+ st.session_state.call_sid = call_sid
63
+ st.session_state.transcript = []
64
+ st.success(f"Call initiated. SID: {call_sid}")
65
+
66
+ for _ in range(60):
67
+ time.sleep(1)
68
+ status_result = make_request('GET', f'/call_status/{call_sid}')
69
+ if not status_result:
70
+ continue
71
+
72
+ status = status_result.get('status')
73
+ if status == 'in-progress':
74
+ st.session_state.call_active = True
75
+ st.session_state.call_selector = "Current Call"
76
+ break
77
+ if status in ['completed', 'failed', 'busy', 'no-answer']:
78
+ st.error(f"Call ended: {status}")
79
+ break
80
+ else:
81
+ st.error("Timeout waiting for call to connect.")
82
+ else:
83
+ st.error(f"Failed to initiate call: {result}")
84
+
85
+ def handle_call_end():
86
+ result = make_request('POST', '/end_call', json={"call_sid": st.session_state.call_sid})
87
+ if result and result.get('status') == 'success':
88
+ st.success("Call ended successfully.")
89
+ st.session_state.call_active = False
90
+ st.session_state.call_sid = None
91
+ st.rerun()
92
+
93
+ def on_call_selector_change():
94
+ if st.session_state.call_selector != "Current Call":
95
+ selected_transcript = next(
96
+ (t for t in st.session_state.all_transcripts if f"Call {t['call_sid']}" == st.session_state.call_selector),
97
+ None
98
+ )
99
+ if selected_transcript:
100
+ st.session_state.recording_info = fetch_recording_info(selected_transcript['call_sid'])
101
+ else:
102
+ st.warning("No transcript found for the selected call.")
103
+ else:
104
+ st.session_state.recording_info = None
105
+
106
+ def update_call_info():
107
+ status_result = make_request('GET', f'/call_status/{st.session_state.call_sid}')
108
+ if not status_result:
109
+ return False
110
+
111
+ status = status_result.get('status')
112
+ if status not in ['in-progress', 'ringing']:
113
+ st.session_state.call_active = False
114
+ st.warning(f"Call ended: {status}")
115
+ return False
116
+
117
+ transcript_result = make_request('GET', f'/transcript/{st.session_state.call_sid}')
118
+ if not transcript_result:
119
+ return False
120
+
121
+ if transcript_result.get('call_ended', False):
122
+ st.session_state.call_active = False
123
+ st.info(f"Call ended. Status: {transcript_result.get('final_status', 'Unknown')}")
124
+ return False
125
+
126
+ st.session_state.transcript = transcript_result.get('transcript', [])
127
+ return True
128
+
129
+ def main():
130
+ init_session_state()
131
+
132
+ with st.sidebar:
133
+ st.markdown(
134
+ "<h2 style='text-align: center; font-size: 2.5em;'>📞 Guilherme Favaron - AI Dialer</h2>",
135
+ unsafe_allow_html=True
136
+ )
137
+ st.divider()
138
+
139
+ phone_number = display_call_interface()
140
+
141
+ st.session_state.system_message = st.text_area(
142
+ "System Message",
143
+ value=st.session_state.system_message,
144
+ disabled=st.session_state.call_active
145
+ )
146
+ st.session_state.initial_message = st.text_area(
147
+ "Initial Message",
148
+ value=st.session_state.initial_message,
149
+ disabled=st.session_state.call_active
150
+ )
151
+
152
+ start_call = st.button("Start Call", disabled=st.session_state.call_active)
153
+ end_call = st.button("End Call", disabled=not st.session_state.call_active)
154
+
155
+ if start_call and phone_number:
156
+ handle_call_start(phone_number)
157
+ elif start_call:
158
+ st.warning("Please enter a valid phone number.")
159
+
160
+ if end_call:
161
+ handle_call_end()
162
+
163
+ if st.session_state.call_active:
164
+ st.success("Call in progress")
165
+ st.divider()
166
+
167
+ st.selectbox(
168
+ "Select a call",
169
+ options=["Current Call"] + [f"Call {t['call_sid']}" for t in st.session_state.all_transcripts],
170
+ key="call_selector",
171
+ index=0,
172
+ disabled=st.session_state.call_active,
173
+ on_change=on_call_selector_change
174
+ )
175
+
176
+ if st.button("Refresh Call List"):
177
+ transcripts = fetch_all_transcripts()
178
+ if transcripts is not None:
179
+ st.session_state.all_transcripts = transcripts
180
+ on_call_selector_change()
181
+
182
+ st.divider()
183
+
184
+ with st.spinner("Loading recording and transcript..."):
185
+ if st.session_state.call_selector != "Current Call" and st.session_state.recording_info:
186
+ st.subheader("Call Recording")
187
+ audio_url = st.session_state.recording_info['url']
188
+ st.audio(audio_url, format="audio/mp3", start_time=0)
189
+ st.divider()
190
+
191
+ if st.session_state.call_active and st.session_state.call_sid:
192
+ st.subheader(f"Transcript for Current Call {st.session_state.call_sid}")
193
+ for entry in st.session_state.transcript:
194
+ if entry['role'] == 'user':
195
+ st.chat_message("user").write(entry['content'])
196
+ elif entry['role'] == 'assistant':
197
+ st.chat_message("assistant").write(entry['content'])
198
+ elif st.session_state.call_selector != "Current Call":
199
+ selected_transcript = next(
200
+ (t for t in st.session_state.all_transcripts if f"Call {t['call_sid']}" == st.session_state.call_selector),
201
+ None
202
+ )
203
+ if selected_transcript:
204
+ st.subheader(f"Transcript for {st.session_state.call_selector}")
205
+ for entry in selected_transcript['transcript']:
206
+ if entry['role'] == 'user':
207
+ st.chat_message("user").write(entry['content'])
208
+ elif entry['role'] == 'assistant':
209
+ st.chat_message("assistant").write(entry['content'])
210
+
211
+ if st.session_state.call_active:
212
+ if update_call_info():
213
+ time.sleep(1)
214
+ st.rerun()
215
+ else:
216
+ st.session_state.call_active = False
217
+ st.session_state.call_sid = None
218
+ st.sidebar.info("Call has ended. You can start a new call if needed.")
219
+ st.rerun()
220
+
221
+ if __name__ == "__main__":
222
+ main()