Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Nov 12, 2025

Commit

5c265a9

verified ·

1 Parent(s): 30eadbb

Update services/voice_coding_service.py

Browse files

Files changed (1) hide show

services/voice_coding_service.py +179 -85

services/voice_coding_service.py CHANGED Viewed

@@ -2,24 +2,185 @@ import gradio as gr
 import numpy as np
 import base64
 import re
 from groq import Groq
-from gradio_webrtc import (
-    WebRTC,
-    ReplyOnStopWords,
     AdditionalOutputs,
-    audio_to_bytes,
     get_twilio_turn_credentials,
 )
 class VoiceCodingService:
-    """Dịch vụ Voice Coding sử dụng Groq + WebRTC"""
     def __init__(self, groq_client: Groq):
         self.groq_client = groq_client
-        self.rtc_configuration = get_twilio_turn_credentials()
         # HTML templates
-        self.spinner_html = """
         <div style="text-align: center; padding: 20px;">
             <div class="spinner"></div>
             <p>🦙 Llama đang code...</p>
@@ -40,29 +201,17 @@ class VoiceCodingService:
         }
         </style>
         """
-        self.sandbox_html = """
-        <div style="text-align: center; padding: 20px;">
-            <h3>🎮 Sandbox Preview</h3>
-            <p>Code sẽ được hiển thị ở đây sau khi generate</p>
-        </div>
-        """
-        self.something_happened_html = """
-        <div style="text-align: center; padding: 20px; color: #e74c3c;">
-            <h3>❌ Có lỗi xảy ra</h3>
-            <p>Không thể generate code. Vui lòng thử lại.</p>
-        </div>
-        """
-        # Prompts
-        self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate."
-        self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
-    def extract_html_content(self, text):
-        """Extract content including HTML tags."""
-        match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
-        return match.group(0) if match else None
     def display_in_sandbox(self, code):
         """Hiển thị code trong sandbox iframe"""
@@ -75,59 +224,4 @@ class VoiceCodingService:
             return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc;"></iframe>'
         except Exception as e:
             print(f"❌ Lỗi display sandbox: {e}")
-            return self.something_happened_html
-    def generate_code(self, user_message: tuple[int, np.ndarray], history: list[dict], code: str):
-        """Generate code từ voice input"""
-        yield AdditionalOutputs(history, self.spinner_html)
-        try:
-            # Chuyển audio sang text
-            sr, audio = user_message
-            audio = audio.squeeze()
-            print("🎤 Converting speech to text...")
-            text = self.groq_client.audio.transcriptions.create(
-                file=("audio-file.mp3", audio_to_bytes((sr, audio))),
-                model="whisper-large-v3-turbo",
-                response_format="verbose_json",
-            ).text
-            print(f"📝 Transcription: {text}")
-            # Format user message
-            user_msg_formatted = self.user_prompt.format(user_message=text, code=code)
-            history.append({"role": "user", "content": user_msg_formatted})
-            # Generate code với Groq
-            print("🦙 Generating code with Llama...")
-            response = self.groq_client.chat.completions.create(
-                model="llama-3.1-8b-instant",
-                messages=history,
-                temperature=1,
-                max_tokens=2048,
-                top_p=1,
-                stream=False,
-            )
-            output = response.choices[0].message.content
-            print("✅ Code generated successfully")
-            # Extract HTML code
-            try:
-                html_code = self.extract_html_content(output)
-                if not html_code:
-                    html_code = f"<!-- Generated Code -->\n{output}"
-            except Exception as e:
-                print(f"⚠️ Could not extract HTML: {e}")
-                html_code = self.something_happened_html
-            # Update history
-            history.append({"role": "assistant", "content": output})
-            yield AdditionalOutputs(history, html_code)
-        except Exception as e:
-            print(f"❌ Lỗi generate code: {e}")
-            history.append({"role": "assistant", "content": f"Error: {str(e)}"})
-            yield AdditionalOutputs(history, self.something_happened_html)

 import numpy as np
 import base64
 import re
+import asyncio
 from groq import Groq
+from fastrtc import (
+    Stream,
+    AsyncStreamHandler,
     AdditionalOutputs,
+    wait_for_item,
     get_twilio_turn_credentials,
 )
+from gradio.utils import get_space
+class VoiceCodingHandler(AsyncStreamHandler):
+    """FastRTC Handler cho Voice Coding"""
+    def __init__(self, groq_client: Groq):
+        super().__init__(
+            expected_layout="mono",
+            output_sample_rate=24000,
+            input_sample_rate=16000,
+        )
+        self.groq_client = groq_client
+        self.input_queue = asyncio.Queue()
+        self.output_queue = asyncio.Queue()
+        self.is_active = False
+        # Prompts
+        self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate."
+        self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
+        self.current_history = [{"role": "system", "content": self.system_prompt}]
+        self.current_code = ""
+    def copy(self):
+        return VoiceCodingHandler(self.groq_client)
+    def extract_html_content(self, text):
+        """Extract content including HTML tags."""
+        match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
+        return match.group(0) if match else None
+    async def start_up(self):
+        """Khởi động handler"""
+        self.is_active = True
+        print("✅ Voice Coding Handler started")
+    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        """Nhận audio frame"""
+        if not self.is_active:
+            return
+        sample_rate, array = frame
+        array = array.squeeze()
+        # Xử lý audio trong background
+        asyncio.create_task(self._process_audio(array, sample_rate))
+    async def _process_audio(self, audio_data: np.ndarray, sample_rate: int):
+        """Xử lý audio và generate code"""
+        try:
+            print("🎤 Processing audio for voice coding...")
+            # Ở đây chúng ta cần chuyển audio sang text
+            # Tạm thời sử dụng transcription service có sẵn
+            transcription = await self._transcribe_audio(audio_data, sample_rate)
+            if transcription and self._is_trigger_phrase(transcription):
+                print(f"🎯 Trigger phrase detected: {transcription}")
+                # Generate loading state
+                await self.output_queue.put(AdditionalOutputs({
+                    "type": "loading",
+                    "message": "🦙 Llama đang code...",
+                    "history": self.current_history,
+                    "code": self.current_code
+                }))
+                # Generate code
+                await self._generate_code(transcription)
+        except Exception as e:
+            print(f"❌ Lỗi xử lý audio: {e}")
+    async def _transcribe_audio(self, audio_data: np.ndarray, sample_rate: int) -> str:
+        """Chuyển audio sang text - simplified version"""
+        try:
+            # Sử dụng transcription service có sẵn từ hệ thống của bạn
+            # Hoặc implement Whisper local
+            return "Tạo trang web hello world"  # Tạm thời return test text
+        except Exception as e:
+            print(f"❌ Lỗi transcription: {e}")
+            return ""
+    def _is_trigger_phrase(self, text: str) -> bool:
+        """Kiểm tra trigger phrase"""
+        trigger_phrases = ["hello llama", "xin chào llama", "llama", "code"]
+        text_lower = text.lower()
+        return any(phrase in text_lower for phrase in trigger_phrases)
+    async def _generate_code(self, user_message: str):
+        """Generate code từ text input"""
+        try:
+            # Format user message
+            user_msg_formatted = self.user_prompt.format(
+                user_message=user_message,
+                code=self.current_code
+            )
+            # Update history
+            self.current_history.append({"role": "user", "content": user_msg_formatted})
+            # Generate code với Groq
+            print("🦙 Generating code with Llama...")
+            response = self.groq_client.chat.completions.create(
+                model="llama-3.3-70b-versatile",
+                messages=self.current_history,
+                temperature=1,
+                max_tokens=2048,
+                top_p=1,
+                stream=False,
+            )
+            output = response.choices[0].message.content
+            print("✅ Code generated successfully")
+            # Extract HTML code
+            html_code = self.extract_html_content(output)
+            if not html_code:
+                html_code = f"<!-- Generated Code -->\n{output}"
+            # Update state
+            self.current_history.append({"role": "assistant", "content": output})
+            self.current_code = html_code
+            # Send result
+            await self.output_queue.put(AdditionalOutputs({
+                "type": "code_generated",
+                "history": self.current_history,
+                "code": html_code,
+                "message": "✅ Code đã được generate!"
+            }))
+        except Exception as e:
+            print(f"❌ Lỗi generate code: {e}")
+            await self.output_queue.put(AdditionalOutputs({
+                "type": "error",
+                "message": f"❌ Lỗi: {str(e)}",
+                "history": self.current_history,
+                "code": self.current_code
+            }))
+    async def emit(self):
+        """Emit outputs"""
+        try:
+            return await wait_for_item(self.output_queue)
+        except Exception as e:
+            print(f"❌ Lỗi emit: {e}")
+            return None
+    async def shutdown(self):
+        """Dừng handler"""
+        self.is_active = False
+        print("🛑 Voice Coding Handler stopped")
 class VoiceCodingService:
+    """Dịch vụ Voice Coding sử dụng FastRTC"""
     def __init__(self, groq_client: Groq):
         self.groq_client = groq_client
+        self.rtc_configuration = get_twilio_turn_credentials() if get_space() else None
         # HTML templates
+        self.sandbox_html = """
+        <div style="text-align: center; padding: 20px;">
+            <h3>🎮 Sandbox Preview</h3>
+            <p>Code sẽ được hiển thị ở đây sau khi generate</p>
+        </div>
+        """
+        self.loading_html = """
         <div style="text-align: center; padding: 20px;">
             <div class="spinner"></div>
             <p>🦙 Llama đang code...</p>
         }
         </style>
         """
+    def create_stream(self):
+        """Tạo FastRTC stream"""
+        return Stream(
+            VoiceCodingHandler(self.groq_client),
+            modality="audio",
+            mode="send-receive",
+            rtc_configuration=self.rtc_configuration,
+            concurrency_limit=5 if get_space() else None,
+            time_limit=90 if get_space() else None,
+        )
     def display_in_sandbox(self, code):
         """Hiển thị code trong sandbox iframe"""
             return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc;"></iframe>'
         except Exception as e:
             print(f"❌ Lỗi display sandbox: {e}")
+            return f'<div style="color: red;">Lỗi hiển thị sandbox: {str(e)}</div>'