Spaces:

guifav
/

ai_caller

Sleeping

App Files Files Community

guifav commited on Dec 11, 2024

Commit

7dfbe6a

1 Parent(s): d95b348

initial

Browse files

Files changed (25) hide show

.DS_Store +0 -0
README.md +35 -3
app.py +272 -0
functions/__pycache__/end_call.cpython-313.pyc +0 -0
functions/__pycache__/function_manifest.cpython-313.pyc +0 -0
functions/__pycache__/transfer_call.cpython-313.pyc +0 -0
functions/end_call.py +25 -0
functions/function_manifest.py +27 -0
functions/transfer_call.py +29 -0
logger_config.py +17 -0
requirements.txt +20 -0
services/__pycache__/call_context.cpython-313.pyc +0 -0
services/__pycache__/event_emmiter.cpython-313.pyc +0 -0
services/__pycache__/llm_service.cpython-313.pyc +0 -0
services/__pycache__/stream_service.cpython-313.pyc +0 -0
services/__pycache__/transcription_service.cpython-313.pyc +0 -0
services/__pycache__/tts_service.cpython-313.pyc +0 -0
services/call_context.py +16 -0
services/event_emmiter.py +56 -0
services/llm_service.py +257 -0
services/stream_service.py +60 -0
services/transcription_service.py +105 -0
services/tts_service.py +154 -0
start.sh +11 -0
streamlit_app.py +222 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Ai Caller
 emoji: 🌍
 colorFrom: pink
-colorTo: pink
 sdk: streamlit
 sdk_version: 1.41.0
 app_file: app.py
@@ -11,4 +11,36 @@ license: mit
 short_description: Call anyone using a robot
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Guilherme Favaron - Ai Caller
 emoji: 🌍
 colorFrom: pink
+colorTo: yellow
 sdk: streamlit
 sdk_version: 1.41.0
 app_file: app.py
 short_description: Call anyone using a robot
 ---
+# AI Dialer
+An AI-powered phone dialer application that allows you to make calls using Twilio and interact with an AI assistant. The application uses FastAPI for the backend and Streamlit for the frontend interface.
+## Features
+- Make phone calls through Twilio
+- Real-time transcription
+- AI-powered conversations
+- Call recording functionality
+- Historical call transcripts
+- Interactive web interface
+## Environment Variables Required
+```
+TWILIO_ACCOUNT_SID=your_twilio_sid
+TWILIO_AUTH_TOKEN=your_twilio_token
+APP_NUMBER=your_twilio_number
+SERVER=your_server_url
+SYSTEM_MESSAGE=your_system_message
+INITIAL_MESSAGE=your_initial_message
+LLM_SERVICE=openai
+TTS_SERVICE=deepgram
+```
+## Deployment
+This application is deployed on Hugging Face Spaces using Docker.
+## Contact the developer
+mailto: falecom_guilhermefavaron@googlegroups.com

app.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import asyncio
+import base64
+import json
+import os
+from collections import deque
+from typing import Dict
+import dotenv
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi.responses import HTMLResponse
+from twilio.rest import Client
+from twilio.twiml.voice_response import Connect, VoiceResponse
+from logger_config import get_logger
+from services.call_context import CallContext
+from services.llm_service import LLMFactory
+from services.stream_service import StreamService
+from services.transcription_service import TranscriptionService
+from services.tts_service import TTSFactory
+dotenv.load_dotenv()
+app = FastAPI()
+logger = get_logger("App")
+# Global dictionary to store call contexts for each server instance
+global call_contexts
+call_contexts = {}
+def get_server_url():
+    """Get the appropriate server URL based on the environment"""
+    return "localhost:8000"
+def get_twilio_client():
+    """Get authenticated Twilio client"""
+    return Client(os.getenv("TWILIO_ACCOUNT_SID"), os.getenv("TWILIO_AUTH_TOKEN"))
+@app.post("/incoming")
+async def incoming_call() -> HTMLResponse:
+    """Handle incoming Twilio calls"""
+    server = get_server_url()
+    response = VoiceResponse()
+    connect = Connect()
+    connect.stream(url=f"wss://{server}/connection")
+    response.append(connect)
+    return HTMLResponse(content=str(response), status_code=200)
+@app.get("/call_recording/{call_sid}")
+async def get_call_recording(call_sid: str):
+    """Get the recording URL for a specific call."""
+    recording = get_twilio_client().calls(call_sid).recordings.list()
+    if recording:
+        return {"recording_url": f"https://api.twilio.com/{recording[0].uri}"}
+    return {"error": "Recording not found"}
+@app.websocket("/connection")
+async def websocket_endpoint(websocket: WebSocket):
+    """Handle WebSocket connection for media streaming"""
+    await websocket.accept()
+    llm_service_name = os.getenv("LLM_SERVICE", "openai")
+    tts_service_name = os.getenv("TTS_SERVICE", "deepgram")
+    logger.info(f"Using LLM service: {llm_service_name}")
+    logger.info(f"Using TTS service: {tts_service_name}")
+    llm_service = LLMFactory.get_llm_service(llm_service_name, CallContext())
+    stream_service = StreamService(websocket)
+    transcription_service = TranscriptionService()
+    tts_service = TTSFactory.get_tts_service(tts_service_name)
+    marks = deque()
+    interaction_count = 0
+    await transcription_service.connect()
+    async def process_media(msg):
+        await transcription_service.send(base64.b64decode(msg['media']['payload']))
+    async def handle_transcription(text):
+        nonlocal interaction_count
+        if not text:
+            return
+        logger.info(f"Interaction {interaction_count} – STT -> LLM: {text}")
+        await llm_service.completion(text, interaction_count)
+        interaction_count += 1
+    async def handle_llm_reply(llm_reply, icount):
+        logger.info(f"Interaction {icount}: LLM -> TTS: {llm_reply['partialResponse']}")
+        await tts_service.generate(llm_reply, icount)
+    async def handle_speech(response_index, audio, label, icount):
+        logger.info(f"Interaction {icount}: TTS -> TWILIO: {label}")
+        await stream_service.buffer(response_index, audio)
+    async def handle_audio_sent(mark_label):
+        marks.append(mark_label)
+    async def handle_utterance(text, stream_sid):
+        try:
+            if len(marks) > 0 and text.strip():
+                logger.info("Interruption detected, clearing system.")
+                await websocket.send_json({
+                    "streamSid": stream_sid,
+                    "event": "clear"
+                })
+                # Reset states
+                stream_service.reset()
+                llm_service.reset()
+        except Exception as e:
+            logger.error(f"Error while handling utterance: {e}")
+            raise
+    transcription_service.on('utterance', handle_utterance)
+    transcription_service.on('transcription', handle_transcription)
+    llm_service.on('llmreply', handle_llm_reply)
+    tts_service.on('speech', handle_speech)
+    stream_service.on('audiosent', handle_audio_sent)
+    # Queue for incoming WebSocket messages
+    message_queue = asyncio.Queue()
+    async def websocket_listener():
+        try:
+            while True:
+                data = await websocket.receive_text()
+                await message_queue.put(json.loads(data))
+        except WebSocketDisconnect:
+            logger.info("WebSocket disconnected")
+    async def message_processor():
+        while True:
+            msg = await message_queue.get()
+            if msg['event'] == 'start':
+                stream_sid = msg['start']['streamSid']
+                call_sid = msg['start']['callSid']
+                call_context = CallContext()
+                if os.getenv("RECORD_CALLS") == "true":
+                    get_twilio_client().calls(call_sid).recordings.create({"recordingChannels": "dual"})
+                # Decide if the call was initiated from the UI or is an inbound
+                if call_sid not in call_contexts:
+                    # Inbound call
+                    call_context.system_message = os.environ.get("SYSTEM_MESSAGE")
+                    call_context.initial_message = os.environ.get("INITIAL_MESSAGE")
+                    call_context.call_sid = call_sid
+                    call_contexts[call_sid] = call_context
+                else:
+                    # Call from UI, reuse the existing context
+                    call_context = call_contexts[call_sid]
+                llm_service.set_call_context(call_context)
+                stream_service.set_stream_sid(stream_sid)
+                transcription_service.set_stream_sid(stream_sid)
+                logger.info(f"Twilio -> Starting Media Stream for {stream_sid}")
+                await tts_service.generate({
+                    "partialResponseIndex": None,
+                    "partialResponse": call_context.initial_message
+                }, 1)
+            elif msg['event'] == 'media':
+                asyncio.create_task(process_media(msg))
+            elif msg['event'] == 'mark':
+                label = msg['mark']['name']
+                if label in marks:
+                    marks.remove(label)
+            elif msg['event'] == 'stop':
+                logger.info("Media stream ended.")
+                break
+            message_queue.task_done()
+    try:
+        listener_task = asyncio.create_task(websocket_listener())
+        processor_task = asyncio.create_task(message_processor())
+        await asyncio.gather(listener_task, processor_task)
+    except asyncio.CancelledError:
+        logger.info("Tasks cancelled")
+    finally:
+        await transcription_service.disconnect()
+@app.post("/start_call")
+async def start_call(request: Dict[str, str]):
+    """Initiate a call using Twilio with optional system and initial messages."""
+    to_number = request.get("to_number")
+    system_message = request.get("system_message")
+    initial_message = request.get("initial_message")
+    logger.info(f"Initiating call to {to_number}")
+    server = get_server_url()
+    service_url = f"https://{server}/incoming"
+    if not to_number:
+        return {"error": "Missing 'to_number' in request"}
+    try:
+        client = get_twilio_client()
+        logger.info(f"Initiating call to {to_number} via {service_url}")
+        call = client.calls.create(
+            to=to_number,
+            from_=os.getenv("APP_NUMBER"),
+            url=service_url
+        )
+        call_sid = call.sid
+        call_context = CallContext()
+        call_contexts[call_sid] = call_context
+        # Set custom system and initial messages for this call if provided
+        call_context.system_message = system_message or os.getenv("SYSTEM_MESSAGE")
+        call_context.initial_message = initial_message or os.getenv("INITIAL_MESSAGE")
+        call_context.call_sid = call_sid
+        return {"call_sid": call_sid}
+    except Exception as e:
+        logger.error(f"Error initiating call: {str(e)}")
+        return {"error": f"Failed to initiate call: {str(e)}"}
+@app.get("/call_status/{call_sid}")
+async def get_call_status(call_sid: str):
+    """Get the status of a call."""
+    try:
+        client = get_twilio_client()
+        call = client.calls(call_sid).fetch()
+        return {"status": call.status}
+    except Exception as e:
+        logger.error(f"Error fetching call status: {str(e)}")
+        return {"error": f"Failed to fetch call status: {str(e)}"}
+@app.post("/end_call")
+async def end_call(request: Dict[str, str]):
+    """End a specific call."""
+    try:
+        call_sid = request.get("call_sid")
+        client = get_twilio_client()
+        client.calls(call_sid).update(status='completed')
+        return {"status": "success"}
+    except Exception as e:
+        logger.error(f"Error ending call {str(e)}")
+        return {"error": f"Failed to end requested call: {str(e)}"}
+@app.get("/transcript/{call_sid}")
+async def get_transcript(call_sid: str):
+    """Get the entire transcript for a specific call."""
+    call_context = call_contexts.get(call_sid)
+    if not call_context:
+        logger.info(f"[GET] Call not found for call SID: {call_sid}")
+        return {"error": "Call not found"}
+    return {"transcript": call_context.user_context}
+@app.get("/all_transcripts")
+async def get_all_transcripts():
+    """Get a list of all current call transcripts."""
+    try:
+        transcript_list = []
+        for call_sid, context in call_contexts.items():
+            transcript_list.append({
+                "call_sid": call_sid,
+                "transcript": context.user_context,
+            })
+        return {"transcripts": transcript_list}
+    except Exception as e:
+        logger.error(f"Error fetching all transcripts: {str(e)}")
+        return {"error": f"Failed to fetch all transcripts: {str(e)}"}
+if __name__ == "__main__":
+    import uvicorn
+    logger.info("Starting server...")
+    logger.info(f"Backend server address set to: {os.getenv('SERVER')}")
+    port = int(os.getenv("PORT", 8000))
+    uvicorn.run(app, host="0.0.0.0", port=port)

functions/__pycache__/end_call.cpython-313.pyc ADDED Viewed

Binary file (1.26 kB). View file

functions/__pycache__/function_manifest.cpython-313.pyc ADDED Viewed

Binary file (685 Bytes). View file

functions/__pycache__/transfer_call.cpython-313.pyc ADDED Viewed

Binary file (1.4 kB). View file

functions/end_call.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+from twilio.rest import Client
+import asyncio
+async def end_call(context, args):
+    # Retrieve the Twilio credentials from environment variables
+    account_sid = os.environ['TWILIO_ACCOUNT_SID']
+    auth_token = os.environ['TWILIO_AUTH_TOKEN']
+    client = Client(account_sid, auth_token)
+    call_sid = context.call_sid
+    # Fetch the call
+    call = client.calls(call_sid).fetch()
+    # Check if the call is already completed
+    if call.status in ['completed', 'failed', 'busy', 'no-answer', 'canceled']:
+        return f"Call already ended with status: {call.status}"
+    # Wait for 5 seconds before ending the call to ensure the goodbye goes through
+    await asyncio.sleep(5)
+    # End the call
+    call = client.calls(call_sid).update(status='completed')
+    return f"Call ended successfully. Final status: {call.status}"

functions/function_manifest.py ADDED Viewed

	@@ -0,0 +1,27 @@

+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "transfer_call",
+            "description": "Transfer call to a human, only do this if the user insists on it.",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            },
+            "say": "Transferring your call, please wait."
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "end_call",
+            "description": "End the current call but always ask for confirmation unless its a natural place in the conversation (and your intent is fullfilled) to end the call.",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            },
+            "say": "Goodbye."
+        }
+    }
+]

functions/transfer_call.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+from twilio.rest import Client
+import asyncio
+async def transfer_call(context, args):
+    # Retrieve the active call using the CallSid
+    account_sid = os.environ['TWILIO_ACCOUNT_SID']
+    auth_token = os.environ['TWILIO_AUTH_TOKEN']
+    transfer_number = os.environ['TRANSFER_NUMBER']
+    client = Client(account_sid, auth_token)
+    call_sid = context.call_sid
+    # Wait for 10 seconds before transferring the call
+    await asyncio.sleep(8)
+    try:
+        call = client.calls(call_sid).fetch()
+        # Update the call with the transfer number
+        call = client.calls(call_sid).update(
+            url=f'http://twimlets.com/forward?PhoneNumber={transfer_number}',
+            method='POST'
+        )
+        return f"Call transferred."
+    except Exception as e:
+        return f"Error transferring call: {str(e)}"

logger_config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import sys
+from loguru import logger
+# Remove the default handler
+logger.remove()
+# Add a new handler with INFO level
+logger.add(
+    sys.stderr,
+    format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan> - <level>{message}</level>",
+    level="INFO",
+    colorize=True
+)
+def get_logger(name):
+    return logger.bind(name=name)

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+fastapi==0.68.0
+uvicorn==0.15.0
+python-dotenv==0.19.0
+streamlit==1.22.0
+twilio==7.17.0
+requests==2.28.2
+python-multipart==0.0.5
+websockets>=11.0
+typing-extensions==4.5.0
+python-jose==3.3.0
+passlib==1.7.4
+bcrypt==4.0.1
+pydantic==1.10.7
+aiohttp==3.8.4
+boto3==1.26.137
+openai==0.27.7
+deepgram-sdk==2.11.0
+elevenlabs==0.2.24
+anthropic==0.3.6
+requests

services/__pycache__/call_context.cpython-313.pyc ADDED Viewed

Binary file (975 Bytes). View file

services/__pycache__/event_emmiter.cpython-313.pyc ADDED Viewed

Binary file (2.84 kB). View file

services/__pycache__/llm_service.cpython-313.pyc ADDED Viewed

Binary file (15.1 kB). View file

services/__pycache__/stream_service.cpython-313.pyc ADDED Viewed

Binary file (3.27 kB). View file

services/__pycache__/transcription_service.cpython-313.pyc ADDED Viewed

Binary file (7.42 kB). View file

services/__pycache__/tts_service.cpython-313.pyc ADDED Viewed

Binary file (8.28 kB). View file

services/call_context.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from typing import List, Optional
+class CallContext:
+    """Store context for the current call."""
+    def __init__(self):
+        self.stream_sid: Optional[str] = None
+        self.call_sid: Optional[str] = None
+        self.call_ended: bool = False
+        self.user_context: List = []
+        self.system_message: str = ""
+        self.initial_message: str = ""
+        self.start_time: Optional[str] = None
+        self.end_time: Optional[str] = None
+        self.final_status: Optional[str] = None

services/event_emmiter.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import asyncio
+from typing import Any, Callable, Dict, List
+class EventEmitter:
+    """
+    A class that represents an event emitter.
+    An event emitter allows registering callbacks for specific events and emitting those events
+    with optional arguments and keyword arguments.
+    """
+    def __init__(self):
+        """
+        Initializes an instance of the EventEmitter class.
+        """
+        self._events: Dict[str, List[Callable]] = {}
+    def on(self, event: str, callback: Callable):
+        """
+        Registers a callback for a specific event.
+        Args:
+            event (str): The name of the event.
+            callback (Callable): The callback function to be executed when the event is emitted.
+        """
+        if event not in self._events:
+            self._events[event] = []
+        self._events[event].append(callback)
+    async def emit(self, event: str, *args: Any, **kwargs: Any):
+        """
+        Emits an event and executes all registered callbacks for that event.
+        Args:
+            event (str): The name of the event.
+            *args (Any): Optional positional arguments to be passed to the callbacks.
+            **kwargs (Any): Optional keyword arguments to be passed to the callbacks.
+        """
+        if event in self._events:
+            for callback in self._events[event]:
+                await self._run_callback(callback, *args, **kwargs)
+    async def _run_callback(self, callback: Callable, *args: Any, **kwargs: Any):
+        """
+        Runs a callback function with the provided arguments.
+        Args:
+            callback (Callable): The callback function to be executed.
+            *args (Any): Optional positional arguments to be passed to the callback.
+            **kwargs (Any): Optional keyword arguments to be passed to the callback.
+        """
+        if asyncio.iscoroutinefunction(callback):
+            await callback(*args, **kwargs)
+        else:
+            callback(*args, **kwargs)

services/llm_service.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import importlib
+import json
+import os
+import re
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List
+import anthropic
+from openai import AsyncOpenAI
+from functions.function_manifest import tools
+from logger_config import get_logger
+from services.call_context import CallContext
+from services.event_emmiter import EventEmitter
+logger = get_logger("LLMService")
+class AbstractLLMService(EventEmitter, ABC):
+    def __init__(self, context: CallContext):
+        super().__init__()
+        self.system_message = context.system_message
+        self.initial_message = context.initial_message
+        self.context = context
+        self.user_context = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": self.initial_message}
+        ]
+        self.partial_response_index = 0
+        self.available_functions = {}
+        for tool in tools:
+            function_name = tool['function']['name']
+            module = importlib.import_module(f'functions.{function_name}')
+            self.available_functions[function_name] = getattr(module, function_name)
+        self.sentence_buffer = ""
+        context.user_context = self.user_context
+    def set_call_context(self, context: CallContext):
+        self.context = context
+        self.user_context = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": context.initial_message}
+        ]
+        context.user_context = self.user_context
+        self.system_message = context.system_message
+        self.initial_message = context.initial_message
+    @abstractmethod
+    async def completion(self, text: str, interaction_count: int, role: str = 'user', name: str = 'user'):
+        pass
+    def reset(self):
+        self.partial_response_index = 0
+    def validate_function_args(self, args):
+        try:
+            return json.loads(args)
+        except json.JSONDecodeError:
+            logger.info('Warning: Invalid function arguments returned by LLM:', args)
+            return {}
+    @staticmethod
+    def convert_openai_tools_to_anthropic(openai_tools):
+        anthropic_tools = []
+        for tool in openai_tools:
+            if tool['type'] == 'function':
+                function = tool['function']
+                anthropic_tool = {
+                    "name": function['name'],
+                    "description": function.get('description', ''),
+                    "input_schema": {
+                        "type": "object",
+                        "properties": function.get('parameters', {}).get('properties', {}),
+                        "required": function.get('parameters', {}).get('required', [])
+                    }
+                }
+                # Remove 'description' from individual properties if present
+                for prop in anthropic_tool['input_schema']['properties'].values():
+                    prop.pop('description', None)
+                # If there are no properties, set an empty dict
+                if not anthropic_tool['input_schema']['properties']:
+                    anthropic_tool['input_schema']['properties'] = {}
+                anthropic_tools.append(anthropic_tool)
+        return anthropic_tools
+    def split_into_sentences(self, text):
+        # Split the text into sentences, keeping the separators
+        sentences = re.split(r'([.!?])', text)
+        # Pair the sentences with their separators
+        sentences = [''.join(sentences[i:i+2]) for i in range(0, len(sentences), 2)]
+        return sentences
+    async def emit_complete_sentences(self, text, interaction_count):
+        self.sentence_buffer += text
+        sentences = self.split_into_sentences(self.sentence_buffer)
+        # Emit all complete sentences
+        for sentence in sentences[:-1]:
+            await self.emit('llmreply', {
+                "partialResponseIndex": self.partial_response_index,
+                "partialResponse": sentence.strip()
+            }, interaction_count)
+            self.partial_response_index += 1
+        # Keep the last (potentially incomplete) sentence in the buffer
+        self.sentence_buffer = sentences[-1] if sentences else ""
+class OpenAIService(AbstractLLMService):
+    def __init__(self, context: CallContext):
+        super().__init__(context)
+        self.openai = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    async def completion(self, text: str, interaction_count: int, role: str = 'user', name: str = 'user'):
+        try:
+            self.user_context.append({"role": role, "content": text, "name": name})
+            messages = [{"role": "system", "content": self.system_message}] + self.user_context
+            stream = await self.openai.chat.completions.create(
+                model="gpt-4o",
+                messages=messages,
+                tools=tools,
+                stream=True,
+            )
+            complete_response = ""
+            function_name = ""
+            function_args = ""
+            async for chunk in stream:
+                delta = chunk.choices[0].delta
+                content = delta.content or ""
+                tool_calls = delta.tool_calls
+                if tool_calls:
+                    for tool_call in tool_calls:
+                        if tool_call.function and tool_call.function.name:
+                            logger.info(f"Function call detected: {tool_call.function.name}")
+                            function_name = tool_call.function.name
+                            function_args += tool_call.function.arguments or ""
+                else:
+                    complete_response += content
+                    await self.emit_complete_sentences(content, interaction_count)
+                if chunk.choices[0].finish_reason == "tool_calls":
+                    logger.info(f"Function call detected: {function_name}")
+                    function_to_call = self.available_functions[function_name]
+                    function_args = self.validate_function_args(function_args)
+                    tool_data = next((tool for tool in tools if tool['function']['name'] == function_name), None)
+                    say = tool_data['function']['say']
+                    await self.emit('llmreply', {
+                        "partialResponseIndex": None,
+                        "partialResponse": say
+                    }, interaction_count)
+                    self.user_context.append({"role": "assistant", "content": say})
+                    function_response = await function_to_call(self.context, function_args)
+                    logger.info(f"Function {function_name} called with args: {function_args}")
+                    if function_name != "end_call":
+                        await self.completion(function_response, interaction_count, 'function', function_name)
+            # Emit any remaining content in the buffer
+            if self.sentence_buffer.strip():
+                await self.emit('llmreply', {
+                    "partialResponseIndex": self.partial_response_index,
+                    "partialResponse": self.sentence_buffer.strip()
+                }, interaction_count)
+                self.sentence_buffer = ""
+            self.user_context.append({"role": "assistant", "content": complete_response})
+        except Exception as e:
+            logger.error(f"Error in OpenAIService completion: {str(e)}")
+class AnthropicService(AbstractLLMService):
+    def __init__(self, context: CallContext):
+        super().__init__(context)
+        self.client = anthropic.AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+        # Add a dummy user message to ensure the first message is from the user
+        self.user_context = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": self.initial_message}
+        ]
+    async def completion(self, text: str, interaction_count: int, role: str = 'user', name: str = 'user'):
+        try:
+            self.user_context.append({"role": role, "content": text})
+            messages = [{"role": msg["role"], "content": msg["content"]} for msg in self.user_context]
+            async with self.client.messages.stream(
+                model="claude-3-opus-20240229",
+                max_tokens=300,
+                system=self.system_message,
+                messages=messages,
+                tools=self.convert_openai_tools_to_anthropic(tools),
+            ) as stream:
+                complete_response = ""
+                async for event in stream:
+                    if event.type == "text":
+                        content = event.text
+                        complete_response += content
+                        await self.emit_complete_sentences(content, interaction_count)
+                    elif event.type == "tool_call":
+                        function_name = event.tool_call.function.name
+                        function_args = event.tool_call.function.arguments
+                        logger.info(f"Function call detected: {function_name}")
+                        function_to_call = self.available_functions[function_name]
+                        function_args = self.validate_function_args(function_args)
+                        tool_data = next((tool for tool in tools if tool['function']['name'] == function_name), None)
+                        say = tool_data['function']['say']
+                        await self.emit('llmreply', {
+                            "partialResponseIndex": None,
+                            "partialResponse": say
+                        }, interaction_count)
+                        function_response = await function_to_call(function_args)
+                        logger.info(f"Function {function_name} called with args: {function_args}")
+                        if function_name != "end_call":
+                            await self.completion(function_response, interaction_count, 'function', function_name)
+                # Emit any remaining content in the buffer
+                if self.sentence_buffer.strip():
+                    await self.emit('llmreply', {
+                        "partialResponseIndex": self.partial_response_index,
+                        "partialResponse": self.sentence_buffer.strip()
+                    }, interaction_count)
+                    self.sentence_buffer = ""
+                final_message = await stream.get_final_message()
+                self.user_context.append({"role": "assistant", "content": final_message.content[0].text})
+        except Exception as e:
+            logger.error(f"Error in AnthropicService completion: {str(e)}")
+class LLMFactory:
+    @staticmethod
+    def get_llm_service(service_name: str, context: CallContext) -> AbstractLLMService:
+        if service_name.lower() == "openai":
+            return OpenAIService(context)
+        elif service_name.lower() == "anthropic":
+            return AnthropicService(context)
+        else:
+            raise ValueError(f"Unsupported LLM service: {service_name}")

services/stream_service.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import uuid
+from typing import Dict
+from fastapi import WebSocket
+from logger_config import get_logger
+from services.event_emmiter import EventEmitter
+logger = get_logger("Stream")
+class StreamService(EventEmitter):
+    def __init__(self, websocket: WebSocket):
+        super().__init__()
+        self.ws = websocket
+        self.expected_audio_index = 0
+        self.audio_buffer: Dict[int, str] = {}
+        self.stream_sid = ''
+    def set_stream_sid(self, stream_sid: str):
+        self.stream_sid = stream_sid
+    async def buffer(self, index: int, audio: str):
+        if index is None:
+            await self.send_audio(audio)
+        elif index == self.expected_audio_index:
+            await self.send_audio(audio)
+            self.expected_audio_index += 1
+            while self.expected_audio_index in self.audio_buffer:
+                buffered_audio = self.audio_buffer[self.expected_audio_index]
+                await self.send_audio(buffered_audio)
+                del self.audio_buffer[self.expected_audio_index]
+                self.expected_audio_index += 1
+        else:
+            self.audio_buffer[index] = audio
+    def reset(self):
+        self.expected_audio_index = 0
+        self.audio_buffer = {}
+    async def send_audio(self, audio: str):
+        await self.ws.send_json({
+            "streamSid": self.stream_sid,
+            "event": "media",
+            "media": {
+                "payload": audio
+            }
+        })
+        mark_label = str(uuid.uuid4())
+        await self.ws.send_json({
+            "streamSid": self.stream_sid,
+            "event": "mark",
+            "mark": {
+                "name": mark_label
+            }
+        })
+        await self.emit('audiosent', mark_label)

services/transcription_service.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+from deepgram import DeepgramClient, LiveOptions, LiveTranscriptionEvents
+from logger_config import get_logger
+from services.event_emmiter import EventEmitter
+logger = get_logger("Transcription")
+class TranscriptionService(EventEmitter):
+    def __init__(self):
+        super().__init__()
+        self.client = DeepgramClient(os.getenv("DEEPGRAM_API_KEY"))
+        self.deepgram_live = None
+        self.final_result = ""
+        self.speech_final = False
+        self.stream_sid = None
+    def set_stream_sid(self, stream_id):
+        self.stream_sid = stream_id
+    def get_stream_sid(self):
+        return self.stream_sid
+    async def connect(self):
+        self.deepgram_live = self.client.listen.asynclive.v("1")
+        await self.deepgram_live.start(LiveOptions(
+            model="nova-2",
+            language="en-US",
+            encoding="mulaw",
+            sample_rate=8000,
+            channels=1,
+            punctuate=True,
+            interim_results=True,
+            endpointing=200,
+            utterance_end_ms=1000
+        ))
+        self.deepgram_live.on(LiveTranscriptionEvents.Transcript, self.handle_transcription)
+        self.deepgram_live.on(LiveTranscriptionEvents.Error, self.handle_error)
+        self.deepgram_live.on(LiveTranscriptionEvents.Close, self.handle_close)
+        self.deepgram_live.on(LiveTranscriptionEvents.Warning, self.handle_warning)
+        self.deepgram_live.on(LiveTranscriptionEvents.Metadata, self.handle_metadata)
+        self.deepgram_live.on(LiveTranscriptionEvents.UtteranceEnd, self.handle_utterance_end)
+    async def handle_utterance_end(self, self_obj, utterance_end):
+        try:
+            if not self.speech_final:
+                logger.info(f"UtteranceEnd received before speech was final, emit the text collected so far: {self.final_result}")
+                await self.emit('transcription', self.final_result)
+                self.final_result = ''
+                self.speech_final = True
+                return
+            else:
+                return
+        except Exception as e:
+            logger.error(f"Error while handling utterance end: {e}")
+            e.print_stack()
+    async def handle_transcription(self, self_obj, result):
+        try:
+            alternatives = result.channel.alternatives if hasattr(result, 'channel') else []
+            text = alternatives[0].transcript if alternatives else ""
+            if result.is_final and text.strip():
+                self.final_result += f" {text}"
+                if result.speech_final:
+                    self.speech_final = True
+                    await self.emit('transcription', self.final_result)
+                    self.final_result = ''
+                else:
+                    self.speech_final = False
+            else:
+                if text.strip():
+                    stream_sid = self.stream_sid
+                    await self.emit('utterance', text, stream_sid)
+        except Exception as e:
+            logger.error(f"Error while handling transcription: {e}")
+            e.print_stack()
+    async def handle_error(self, self_obj, error):
+        logger.error(f"Deepgram error: {error}")
+        self.is_connected = False
+    async def handle_warning(self, self_obj, warning):
+        logger.info('Deepgram warning:', warning)
+    async def handle_metadata(self, self_obj, metadata):
+        logger.info('Deepgram metadata:', metadata)
+    async def handle_close(self, self_obj, close):
+        logger.info("Deepgram connection closed")
+        self.is_connected = False
+    async def send(self, payload: bytes):
+        if self.deepgram_live:
+            await self.deepgram_live.send(payload)
+    async def disconnect(self):
+        if self.deepgram_live:
+            await self.deepgram_live.finish()
+            self.deepgram_live = None
+        self.is_connected = False
+        logger.info("Disconnected from Deepgram")

services/tts_service.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import base64
+import os
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+import aiohttp
+import numpy as np
+from deepgram import DeepgramClient, LiveOptions
+from dotenv import load_dotenv
+from logger_config import get_logger
+from services.event_emmiter import EventEmitter
+load_dotenv()
+logger = get_logger("TTS")
+class AbstractTTSService(EventEmitter, ABC):
+    @abstractmethod
+    async def generate(self, llm_reply: Dict[str, Any], interaction_count: int):
+        pass
+    @abstractmethod
+    async def set_voice(self, voice_id: str):
+        pass
+    @abstractmethod
+    async def disconnect(self):
+        pass
+class ElevenLabsTTS(AbstractTTSService):
+    def __init__(self):
+        super().__init__()
+        self.voice_id = os.getenv("ELEVENLABS_VOICE_ID")
+        self.api_key = os.getenv("ELEVENLABS_API_KEY")
+        self.model_id = os.getenv("ELEVENLABS_MODEL_ID")
+        self.speech_buffer = {}
+    def set_voice(self, voice_id):
+        self.voice_id = voice_id
+    async def disconnect(self):
+        # ElevenLabs client doesn't require explicit disconnection
+        return
+    async def generate(self, llm_reply: Dict[str, Any], interaction_count: int):
+        partial_response_index, partial_response = llm_reply['partialResponseIndex'], llm_reply['partialResponse']
+        if not partial_response:
+            return
+        try:
+            output_format = "ulaw_8000"
+            url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
+            headers = {
+                "xi-api-key": self.api_key,
+                "Content-Type": "application/json",
+                "Accept": "audio/wav"
+            }
+            params = {
+                "output_format": output_format,
+                "optimize_streaming_latency": 4
+            }
+            data = {
+                "model_id": self.model_id,
+                "text": partial_response
+            }
+            async with aiohttp.ClientSession() as session:
+                async with session.post(url, headers=headers, params=params, json=data) as response:
+                    if response.status == 200:
+                        audio_content = await response.read()
+                        audio_base64 = base64.b64encode(audio_content).decode('utf-8')
+                        await self.emit('speech', partial_response_index, audio_base64, partial_response, interaction_count)
+        except Exception as err:
+            logger.error("Error occurred in ElevenLabs TTS service", exc_info=True)
+            logger.error(str(err))
+class DeepgramTTS(AbstractTTSService):
+    def __init__(self):
+        super().__init__()
+        self.client = DeepgramClient(os.getenv("DEEPGRAM_API_KEY"))
+    async def generate(self, llm_reply, interaction_count):
+        partial_response_index = llm_reply['partialResponseIndex']
+        partial_response = llm_reply['partialResponse']
+        if not partial_response:
+            return
+        try:
+            source = {
+                "text": partial_response
+            }
+            options = {
+                "model": "aura-asteria-en",
+                "encoding": "mulaw",
+                "sample_rate": 8000
+            }
+            response = await self.client.asyncspeak.v("1").stream(
+                source={"text": partial_response},
+                options=options
+            )
+            if response.stream:
+                audio_content = response.stream.getvalue()
+                # Convert audio to numpy array
+                audio_array = np.frombuffer(audio_content, dtype=np.uint8)
+                # Trim the first 10ms (80 samples at 8000Hz) to remove the initial noise
+                trim_samples = 80
+                trimmed_audio = audio_array[trim_samples:]
+                # Convert back to bytes
+                trimmed_audio_bytes = trimmed_audio.tobytes()
+                audio_base64 = base64.b64encode(trimmed_audio_bytes).decode('utf-8')
+                await self.emit('speech', partial_response_index, audio_base64, partial_response, interaction_count)
+            else:
+                logger.error("Error in TTS generation: No audio stream returned")
+        except Exception as e:
+            logger.error(f"Error in TTS generation: {str(e)}")
+    async def set_voice(self, voice_id):
+        logger.info(f"Attempting to set voice to {voice_id}, but Deepgram TTS doesn't support direct voice selection.")
+        # TODO(akiani): Implement voice selection in Deepgram TTS
+    async def disconnect(self):
+        # Deepgram client doesn't require explicit disconnection
+        logger.info("DeepgramTTS service disconnected")
+class TTSFactory:
+    @staticmethod
+    def get_tts_service(service_name: str) -> AbstractTTSService:
+        if service_name.lower() == "elevenlabs":
+            return ElevenLabsTTS()
+        elif service_name.lower() == "deepgram":
+            return DeepgramTTS()
+        else:
+            raise ValueError(f"Unsupported TTS service: {service_name}")
+# Usage in your main application
+tts_service_name = os.getenv("TTS_SERVICE", "deepgram")  # Default to deepgram if not specified
+tts_service = TTSFactory.get_tts_service(tts_service_name)

start.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+#!/bin/bash
+export PORT=${PORT:-7860}
+# Start FastAPI
+uvicorn app:app --host 0.0.0.0 --port 8000 &
+# Aguardar o backend subir
+sleep 5
+# Iniciar Streamlit na porta $PORT
+streamlit run streamlit_app.py --server.port $PORT --server.address 0.0.0.0

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import os
+import time
+import requests
+import streamlit as st
+import dotenv
+dotenv.load_dotenv(verbose=True)
+st.set_page_config(page_title="Guilherme Favaron - AI Dialer", page_icon="📞", layout="wide")
+def get_api_url():
+    """Get the appropriate API URL based on the environment"""
+    if os.getenv('SPACE_ID'):
+        # Para comunicação interna no Spaces
+        return "http://localhost:8000"
+    # Para desenvolvimento local
+    port = os.getenv('PORT', '8000')
+    return f"http://localhost:{port}"
+def make_request(method, endpoint, **kwargs):
+    """Unified request handler with proper error handling"""
+    base_url = get_api_url()
+    # Make sure the endpoint starts with a slash
+    if not endpoint.startswith('/'):
+        endpoint = f"/{endpoint}"
+    url = f"{base_url}{endpoint}"
+    try:
+        response = requests.request(method, url, **kwargs)
+        response.raise_for_status()
+        return response.json()
+    except requests.RequestException as e:
+        st.error(f"Error connecting to {endpoint}: {str(e)}")
+        if kwargs.get('debug', False):
+            st.error(f"Attempted URL: {url}")
+        return None
+def init_session_state():
+    if 'call_active' not in st.session_state:
+        st.session_state.call_active = False
+        st.session_state.call_sid = None
+        st.session_state.transcript = []
+        st.session_state.system_message = os.getenv("SYSTEM_MESSAGE")
+        st.session_state.initial_message = os.getenv("INITIAL_MESSAGE")
+        st.session_state.all_transcripts = fetch_all_transcripts()
+        st.session_state.recording_info = None
+        st.session_state.call_selector = "Current Call"
+def handle_call_start(phone_number):
+    with st.spinner(f"Calling {phone_number}..."):
+        payload = {
+            "to_number": phone_number,
+            "system_message": st.session_state.system_message,
+            "initial_message": st.session_state.initial_message
+        }
+        result = make_request('POST', '/start_call', json=payload, timeout=10)
+        if not result:
+            return
+        if call_sid := result.get('call_sid'):
+            st.session_state.call_sid = call_sid
+            st.session_state.transcript = []
+            st.success(f"Call initiated. SID: {call_sid}")
+            for _ in range(60):
+                time.sleep(1)
+                status_result = make_request('GET', f'/call_status/{call_sid}')
+                if not status_result:
+                    continue
+                status = status_result.get('status')
+                if status == 'in-progress':
+                    st.session_state.call_active = True
+                    st.session_state.call_selector = "Current Call"
+                    break
+                if status in ['completed', 'failed', 'busy', 'no-answer']:
+                    st.error(f"Call ended: {status}")
+                    break
+            else:
+                st.error("Timeout waiting for call to connect.")
+        else:
+            st.error(f"Failed to initiate call: {result}")
+def handle_call_end():
+    result = make_request('POST', '/end_call', json={"call_sid": st.session_state.call_sid})
+    if result and result.get('status') == 'success':
+        st.success("Call ended successfully.")
+        st.session_state.call_active = False
+        st.session_state.call_sid = None
+        st.rerun()
+def on_call_selector_change():
+    if st.session_state.call_selector != "Current Call":
+        selected_transcript = next(
+            (t for t in st.session_state.all_transcripts if f"Call {t['call_sid']}" == st.session_state.call_selector),
+            None
+        )
+        if selected_transcript:
+            st.session_state.recording_info = fetch_recording_info(selected_transcript['call_sid'])
+        else:
+            st.warning("No transcript found for the selected call.")
+    else:
+        st.session_state.recording_info = None
+def update_call_info():
+    status_result = make_request('GET', f'/call_status/{st.session_state.call_sid}')
+    if not status_result:
+        return False
+    status = status_result.get('status')
+    if status not in ['in-progress', 'ringing']:
+        st.session_state.call_active = False
+        st.warning(f"Call ended: {status}")
+        return False
+    transcript_result = make_request('GET', f'/transcript/{st.session_state.call_sid}')
+    if not transcript_result:
+        return False
+    if transcript_result.get('call_ended', False):
+        st.session_state.call_active = False
+        st.info(f"Call ended. Status: {transcript_result.get('final_status', 'Unknown')}")
+        return False
+    st.session_state.transcript = transcript_result.get('transcript', [])
+    return True
+def main():
+    init_session_state()
+    with st.sidebar:
+        st.markdown(
+            "<h2 style='text-align: center; font-size: 2.5em;'>📞 Guilherme Favaron - AI Dialer</h2>",
+            unsafe_allow_html=True
+        )
+        st.divider()
+        phone_number = display_call_interface()
+        st.session_state.system_message = st.text_area(
+            "System Message",
+            value=st.session_state.system_message,
+            disabled=st.session_state.call_active
+        )
+        st.session_state.initial_message = st.text_area(
+            "Initial Message",
+            value=st.session_state.initial_message,
+            disabled=st.session_state.call_active
+        )
+        start_call = st.button("Start Call", disabled=st.session_state.call_active)
+        end_call = st.button("End Call", disabled=not st.session_state.call_active)
+        if start_call and phone_number:
+            handle_call_start(phone_number)
+        elif start_call:
+            st.warning("Please enter a valid phone number.")
+        if end_call:
+            handle_call_end()
+        if st.session_state.call_active:
+            st.success("Call in progress")
+        st.divider()
+    st.selectbox(
+        "Select a call",
+        options=["Current Call"] + [f"Call {t['call_sid']}" for t in st.session_state.all_transcripts],
+        key="call_selector",
+        index=0,
+        disabled=st.session_state.call_active,
+        on_change=on_call_selector_change
+    )
+    if st.button("Refresh Call List"):
+        transcripts = fetch_all_transcripts()
+        if transcripts is not None:
+            st.session_state.all_transcripts = transcripts
+            on_call_selector_change()
+    st.divider()
+    with st.spinner("Loading recording and transcript..."):
+        if st.session_state.call_selector != "Current Call" and st.session_state.recording_info:
+            st.subheader("Call Recording")
+            audio_url = st.session_state.recording_info['url']
+            st.audio(audio_url, format="audio/mp3", start_time=0)
+            st.divider()
+        if st.session_state.call_active and st.session_state.call_sid:
+            st.subheader(f"Transcript for Current Call {st.session_state.call_sid}")
+            for entry in st.session_state.transcript:
+                if entry['role'] == 'user':
+                    st.chat_message("user").write(entry['content'])
+                elif entry['role'] == 'assistant':
+                    st.chat_message("assistant").write(entry['content'])
+        elif st.session_state.call_selector != "Current Call":
+            selected_transcript = next(
+                (t for t in st.session_state.all_transcripts if f"Call {t['call_sid']}" == st.session_state.call_selector),
+                None
+            )
+            if selected_transcript:
+                st.subheader(f"Transcript for {st.session_state.call_selector}")
+                for entry in selected_transcript['transcript']:
+                    if entry['role'] == 'user':
+                        st.chat_message("user").write(entry['content'])
+                    elif entry['role'] == 'assistant':
+                        st.chat_message("assistant").write(entry['content'])
+    if st.session_state.call_active:
+        if update_call_info():
+            time.sleep(1)
+            st.rerun()
+        else:
+            st.session_state.call_active = False
+            st.session_state.call_sid = None
+            st.sidebar.info("Call has ended. You can start a new call if needed.")
+            st.rerun()
+if __name__ == "__main__":
+    main()