Spaces:

mgbam
/

rentbot

Runtime error

App Files Files Community

mgbam commited on Jul 18, 2025

Commit

db431a4

verified ·

1 Parent(s): 9a4c62a

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -14

app.py CHANGED Viewed

@@ -14,19 +14,32 @@ from llm_handler import get_llm_response
 from tts_handler import text_to_speech_stream
 from tool_handler import execute_tool_call
-# Load environment variables
 load_dotenv()
 app = FastAPI()
-# Configuration
 SILENCE_THRESHOLD_SECONDS = 0.7
 AUDIO_RATE = 8000  # Hz for Twilio media streams
 AUDIO_BUFFER_SIZE = int(SILENCE_THRESHOLD_SECONDS * AUDIO_RATE)
-# In-memory session storage (for demonstration)
 sessions = {}
 @app.websocket("/rentbot")
 async def websocket_endpoint(ws: WebSocket):
     await ws.accept()
@@ -69,7 +82,7 @@ async def websocket_endpoint(ws: WebSocket):
                 audio_buffer = np.append(audio_buffer, chunk_pcm)
                 if len(audio_buffer) >= AUDIO_BUFFER_SIZE:
-                    if sessions[stream_sid]["processing_task"] and not sessions[stream_sid]["processing_task"].done():
                         continue
                     task = asyncio.create_task(process_user_audio(ws, stream_sid, audio_buffer))
                     sessions[stream_sid]["processing_task"] = task
@@ -77,8 +90,8 @@ async def websocket_endpoint(ws: WebSocket):
             elif data['event'] == 'mark':
                 if not stream_sid: continue
-                if len(audio_buffer) > 1000:
-                    if not (sessions[stream_sid]["processing_task"] and not sessions[stream_sid]["processing_task"].done()):
                         task = asyncio.create_task(process_user_audio(ws, stream_sid, audio_buffer))
                         sessions[stream_sid]["processing_task"] = task
                         audio_buffer = np.array([], dtype=np.int16)
@@ -90,19 +103,21 @@ async def websocket_endpoint(ws: WebSocket):
     except WebSocketDisconnect:
         print(f"WebSocket disconnected for stream {stream_sid}")
     except Exception as e:
-        print(f"An error occurred: {e}")
     finally:
         if stream_sid and stream_sid in sessions:
-            if sessions[stream_sid]["processing_task"]:
                 sessions[stream_sid]["processing_task"].cancel()
             del sessions[stream_sid]
         print(f"Session cleaned up for stream {stream_sid}")
 async def process_user_audio(ws: WebSocket, stream_sid: str, audio_chunk: np.ndarray):
     """The main logic loop: STT -> LLM -> (Tool/TTS)"""
     print(f"[{stream_sid}] Processing audio chunk of size {len(audio_chunk)}...")
     user_text = await transcribe_audio_chunk(audio_chunk)
     if not user_text:
         print(f"[{stream_sid}] No text transcribed.")
@@ -111,26 +126,28 @@ async def process_user_audio(ws: WebSocket, stream_sid: str, audio_chunk: np.nda
     print(f"[{stream_sid}] User said: {user_text}")
     sessions[stream_sid]["messages"].append({"role": "user", "content": user_text})
     tts_queue = asyncio.Queue()
-    async def llm_chunk_handler(chunk):
-        await tts_queue.put(chunk)
     async def tts_text_iterator():
         while True:
             chunk = await tts_queue.get()
             if chunk is None: break
             yield chunk
     llm_task = asyncio.create_task(get_llm_response(sessions[stream_sid]["messages"], llm_chunk_handler))
     tts_task = asyncio.create_task(stream_and_send_audio(ws, stream_sid, tts_text_iterator()))
     assistant_message, tool_calls = await llm_task
-    await tts_queue.put(None)
-    await tts_task
     if assistant_message and assistant_message.get("content"):
        sessions[stream_sid]["messages"].append(assistant_message)
     if tool_calls:
         sessions[stream_sid]["messages"].append(assistant_message)
@@ -144,6 +161,7 @@ async def process_user_audio(ws: WebSocket, stream_sid: str, audio_chunk: np.nda
             tool_result_message = execute_tool_call(tool_call)
             sessions[stream_sid]["messages"].append(tool_result_message)
         final_tts_queue = asyncio.Queue()
         async def final_llm_chunk_handler(chunk): await final_tts_queue.put(chunk)
         async def final_tts_iterator():
@@ -178,9 +196,10 @@ async def stream_and_send_audio(ws: WebSocket, stream_sid: str, text_iterator):
     print(f"[{stream_sid}] Finished sending bot's audio turn.")
 if __name__ == "__main__":
     import uvicorn
     # Hugging Face Spaces expects the app to run on port 7860
     port = int(os.environ.get("PORT", 7860))
-    print(f"Starting RentBot server on port {port}...")
     uvicorn.run(app, host="0.0.0.0", port=port)

 from tts_handler import text_to_speech_stream
 from tool_handler import execute_tool_call
+# Load environment variables from .env file
 load_dotenv()
+# Initialize FastAPI application
 app = FastAPI()
+# --- Add a root endpoint for health checks and basic info ---
+@app.get("/")
+async def root():
+    """
+    A simple GET endpoint to confirm the server is running and provide info.
+    This is what you see when you visit the Hugging Face Space URL in a browser.
+    """
+    return {"status": "running", "message": "RentBot is active. Connect via WebSocket at the /rentbot endpoint."}
+# --- Global Configuration ---
 SILENCE_THRESHOLD_SECONDS = 0.7
 AUDIO_RATE = 8000  # Hz for Twilio media streams
 AUDIO_BUFFER_SIZE = int(SILENCE_THRESHOLD_SECONDS * AUDIO_RATE)
+# In-memory session storage (for demonstration). In production, use Redis or a database.
 sessions = {}
+# --- Main WebSocket Endpoint for Twilio ---
 @app.websocket("/rentbot")
 async def websocket_endpoint(ws: WebSocket):
     await ws.accept()
                 audio_buffer = np.append(audio_buffer, chunk_pcm)
                 if len(audio_buffer) >= AUDIO_BUFFER_SIZE:
+                    if sessions[stream_sid].get("processing_task") and not sessions[stream_sid]["processing_task"].done():
                         continue
                     task = asyncio.create_task(process_user_audio(ws, stream_sid, audio_buffer))
                     sessions[stream_sid]["processing_task"] = task
             elif data['event'] == 'mark':
                 if not stream_sid: continue
+                if len(audio_buffer) > 1000: # Heuristic to process leftover audio on pause
+                    if not (sessions[stream_sid].get("processing_task") and not sessions[stream_sid]["processing_task"].done()):
                         task = asyncio.create_task(process_user_audio(ws, stream_sid, audio_buffer))
                         sessions[stream_sid]["processing_task"] = task
                         audio_buffer = np.array([], dtype=np.int16)
     except WebSocketDisconnect:
         print(f"WebSocket disconnected for stream {stream_sid}")
     except Exception as e:
+        print(f"An error occurred in websocket_endpoint: {e}")
     finally:
         if stream_sid and stream_sid in sessions:
+            if sessions[stream_sid].get("processing_task"):
                 sessions[stream_sid]["processing_task"].cancel()
             del sessions[stream_sid]
         print(f"Session cleaned up for stream {stream_sid}")
+# --- Core Logic Functions ---
 async def process_user_audio(ws: WebSocket, stream_sid: str, audio_chunk: np.ndarray):
     """The main logic loop: STT -> LLM -> (Tool/TTS)"""
     print(f"[{stream_sid}] Processing audio chunk of size {len(audio_chunk)}...")
+    # 1. Speech-to-Text
     user_text = await transcribe_audio_chunk(audio_chunk)
     if not user_text:
         print(f"[{stream_sid}] No text transcribed.")
     print(f"[{stream_sid}] User said: {user_text}")
     sessions[stream_sid]["messages"].append({"role": "user", "content": user_text})
+    # Queue to pass text from LLM to TTS
     tts_queue = asyncio.Queue()
+    async def llm_chunk_handler(chunk): await tts_queue.put(chunk)
     async def tts_text_iterator():
         while True:
             chunk = await tts_queue.get()
             if chunk is None: break
             yield chunk
+    # 2. Start LLM and TTS tasks concurrently for low latency
     llm_task = asyncio.create_task(get_llm_response(sessions[stream_sid]["messages"], llm_chunk_handler))
     tts_task = asyncio.create_task(stream_and_send_audio(ws, stream_sid, tts_text_iterator()))
+    # Wait for LLM to finish and get final message object
     assistant_message, tool_calls = await llm_task
+    await tts_queue.put(None) # Signal TTS to end
+    await tts_task # Wait for TTS to finish sending audio
     if assistant_message and assistant_message.get("content"):
        sessions[stream_sid]["messages"].append(assistant_message)
+    # 3. Handle Tool Calls if any
     if tool_calls:
         sessions[stream_sid]["messages"].append(assistant_message)
             tool_result_message = execute_tool_call(tool_call)
             sessions[stream_sid]["messages"].append(tool_result_message)
+        # 4. Get a final response from the LLM after executing the tool
         final_tts_queue = asyncio.Queue()
         async def final_llm_chunk_handler(chunk): await final_tts_queue.put(chunk)
         async def final_tts_iterator():
     print(f"[{stream_sid}] Finished sending bot's audio turn.")
+# --- Application Entry Point ---
 if __name__ == "__main__":
     import uvicorn
     # Hugging Face Spaces expects the app to run on port 7860
     port = int(os.environ.get("PORT", 7860))
+    print(f"Starting RentBot server on host 0.0.0.0 and port {port}...")
     uvicorn.run(app, host="0.0.0.0", port=port)