Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Nov 13, 2025

Commit

cc93a2a

verified ·

1 Parent(s): 6e7d07a

Update services/sambanova_voice_service.py

Browse files

Files changed (1) hide show

services/sambanova_voice_service.py +74 -87

services/sambanova_voice_service.py CHANGED Viewed

@@ -12,13 +12,14 @@ from fastrtc import (
     ReplyOnStopWords,
     Stream,
     get_stt_model,
-    get_twilio_turn_credentials,
 )
 from gradio.utils import get_space
 from pydantic import BaseModel
 class SambanovaVoiceService:
-    """Dịch vụ Voice AI với Sambanova API"""
     def __init__(self):
         self.curr_dir = Path(__file__).parent
@@ -32,12 +33,21 @@ class SambanovaVoiceService:
         # STT model
         self.model = get_stt_model()
-        # RTC configuration
-        self.rtc_configuration = get_twilio_turn_credentials() if get_space() else None
-        # FastAPI app
-        self.app = FastAPI()
     def create_response_handler(self):
         """Tạo response handler cho voice streaming"""
@@ -49,33 +59,49 @@ class SambanovaVoiceService:
             gradio_chatbot = gradio_chatbot or []
             conversation_state = conversation_state or []
-            # Speech to Text
-            text = self.model.stt(audio)
-            print("🎤 STT Result:", text)
-            # Thêm audio vào chatbot
-            sample_rate, array = audio
-            gradio_chatbot.append(
-                {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
-            )
-            yield AdditionalOutputs(gradio_chatbot, conversation_state)
-            # Thêm text vào conversation state
-            conversation_state.append({"role": "user", "content": text})
-            # Gọi Sambanova API
-            request = self.client.chat.completions.create(
-                model="Meta-Llama-3.2-3B-Instruct",
-                messages=conversation_state,
-                temperature=0.1,
-                top_p=0.1,
-            )
-            response_content = {"role": "assistant", "content": request.choices[0].message.content}
-            conversation_state.append(response_content)
-            gradio_chatbot.append(response_content)
-            yield AdditionalOutputs(gradio_chatbot, conversation_state)
         return response
@@ -86,65 +112,26 @@ class SambanovaVoiceService:
         return Stream(
             ReplyOnStopWords(
                 response_handler,
-                stop_words=["computer", "hey", "hello", "xin chào"],
                 input_sample_rate=16000,
             ),
             mode="send",
             modality="audio",
-            additional_inputs=[gr.Chatbot(type="messages", value=[]), gr.State(value=[])],
-            additional_outputs=[gr.Chatbot(type="messages", value=[]), gr.State(value=[])],
-            additional_outputs_handler=lambda *a: (a[2], a[3]),
-            concurrency_limit=5 if get_space() else None,
-            time_limit=90 if get_space() else None,
             rtc_configuration=self.rtc_configuration,
-        )
-    def setup_fastapi_routes(self):
-        """Thiết lập FastAPI routes"""
-        class Message(BaseModel):
-            role: str
-            content: str
-        class InputData(BaseModel):
-            webrtc_id: str
-            chatbot: list[Message]
-            state: list[Message]
-        @self.app.get("/")
-        async def home():
-            rtc_config = get_twilio_turn_credentials() if get_space() else None
-            html_content = (self.curr_dir / "templates" / "sambanova_index.html").read_text()
-            html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
-            return HTMLResponse(content=html_content)
-        @self.app.post("/input_hook")
-        async def input_hook(data: InputData):
-            body = data.model_dump()
-            # stream.set_input(data.webrtc_id, body["chatbot"], body["state"])
-            return {"status": "ok"}
-        def audio_to_base64(file_path):
-            audio_format = "wav"
-            with open(file_path, "rb") as audio_file:
-                encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
-            return f"data:audio/{audio_format};base64,{encoded_audio}"
-        @self.app.get("/outputs")
-        async def outputs(webrtc_id: str):
-            async def output_stream():
-                # async for output in stream.output_stream(webrtc_id):
-                #     chatbot = output.args[0]
-                #     state = output.args[1]
-                #     data = {
-                #         "message": state[-1],
-                #         "audio": audio_to_base64(chatbot[-1]["content"].value["path"])
-                #         if chatbot[-1]["role"] == "user"
-                #         else None,
-                #     }
-                #     yield f"event: output\ndata: {json.dumps(data)}\n\n"
-                yield f"event: output\ndata: {json.dumps({'message': 'Stream ready'})}\n\n"
-            return StreamingResponse(output_stream(), media_type="text/event-stream")
-        return self.app

     ReplyOnStopWords,
     Stream,
     get_stt_model,
+    get_cloudflare_turn_credentials_async,  # Sử dụng Cloudflare free
 )
 from gradio.utils import get_space
 from pydantic import BaseModel
+import asyncio
 class SambanovaVoiceService:
+    """Dịch vụ Voice AI với Sambanova API - Fixed TURN issue"""
     def __init__(self):
         self.curr_dir = Path(__file__).parent
         # STT model
         self.model = get_stt_model()
+        # RTC configuration - Sử dụng Cloudflare free hoặc None
+        self.rtc_configuration = asyncio.run(self._get_turn_config())
+        print("✅ Sambanova Voice Service initialized")
+    async def _get_turn_config(self):
+        """Lấy TURN configuration - sử dụng Cloudflare free"""
+        try:
+            config = await get_cloudflare_turn_credentials_async()
+            print("✅ Using Cloudflare TURN servers")
+            return config
+        except Exception as e:
+            print(f"⚠️ Cannot get TURN credentials, using None: {e}")
+            return None  # Sẽ hoạt động trên local network
     def create_response_handler(self):
         """Tạo response handler cho voice streaming"""
             gradio_chatbot = gradio_chatbot or []
             conversation_state = conversation_state or []
+            try:
+                # Speech to Text
+                text = self.model.stt(audio)
+                print("🎤 STT Result:", text)
+                if not text.strip():
+                    yield AdditionalOutputs(gradio_chatbot, conversation_state)
+                    return
+                # Thêm audio vào chatbot
+                sample_rate, array = audio
+                gradio_chatbot.append(
+                    {"role": "user", "content": f"🎤: {text}"}  # Simplified - chỉ hiển thị text
+                )
+                yield AdditionalOutputs(gradio_chatbot, conversation_state)
+                # Thêm text vào conversation state
+                conversation_state.append({"role": "user", "content": text})
+                # Gọi Sambanova API
+                print("🤖 Calling Sambanova API...")
+                request = self.client.chat.completions.create(
+                    model="Meta-Llama-3.2-3B-Instruct",
+                    messages=conversation_state,
+                    temperature=0.1,
+                    top_p=0.1,
+                )
+                response_content = {
+                    "role": "assistant",
+                    "content": request.choices[0].message.content
+                }
+                conversation_state.append(response_content)
+                gradio_chatbot.append(response_content)
+                yield AdditionalOutputs(gradio_chatbot, conversation_state)
+            except Exception as e:
+                print(f"❌ Error in response handler: {e}")
+                error_msg = {"role": "assistant", "content": f"❌ Lỗi: {str(e)}"}
+                gradio_chatbot.append(error_msg)
+                conversation_state.append(error_msg)
+                yield AdditionalOutputs(gradio_chatbot, conversation_state)
         return response
         return Stream(
             ReplyOnStopWords(
                 response_handler,
+                stop_words=["computer", "hey", "hello", "xin chào", "llama"],
                 input_sample_rate=16000,
             ),
             mode="send",
             modality="audio",
+            additional_inputs=[
+                gr.Chatbot(
+                    type="messages",
+                    value=[],
+                    label="💬 Voice Conversation",
+                    height=400
+                ),
+                gr.State(value=[])
+            ],
+            additional_outputs=[
+                gr.Chatbot(type="messages", value=[]),
+                gr.State(value=[])
+            ],
+            additional_outputs_handler=lambda chatbot, state, new_chatbot, new_state: (new_chatbot, new_state),
+            concurrency_limit=3,
+            time_limit=120,
             rtc_configuration=self.rtc_configuration,
+        )