datbkpro commited on
Commit
5c265a9
·
verified ·
1 Parent(s): 30eadbb

Update services/voice_coding_service.py

Browse files
Files changed (1) hide show
  1. services/voice_coding_service.py +179 -85
services/voice_coding_service.py CHANGED
@@ -2,24 +2,185 @@ import gradio as gr
2
  import numpy as np
3
  import base64
4
  import re
 
5
  from groq import Groq
6
- from gradio_webrtc import (
7
- WebRTC,
8
- ReplyOnStopWords,
9
  AdditionalOutputs,
10
- audio_to_bytes,
11
  get_twilio_turn_credentials,
12
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  class VoiceCodingService:
15
- """Dịch vụ Voice Coding sử dụng Groq + WebRTC"""
16
 
17
  def __init__(self, groq_client: Groq):
18
  self.groq_client = groq_client
19
- self.rtc_configuration = get_twilio_turn_credentials()
20
 
21
  # HTML templates
22
- self.spinner_html = """
 
 
 
 
 
 
 
23
  <div style="text-align: center; padding: 20px;">
24
  <div class="spinner"></div>
25
  <p>🦙 Llama đang code...</p>
@@ -40,29 +201,17 @@ class VoiceCodingService:
40
  }
41
  </style>
42
  """
43
-
44
- self.sandbox_html = """
45
- <div style="text-align: center; padding: 20px;">
46
- <h3>🎮 Sandbox Preview</h3>
47
- <p>Code sẽ được hiển thị ở đây sau khi generate</p>
48
- </div>
49
- """
50
-
51
- self.something_happened_html = """
52
- <div style="text-align: center; padding: 20px; color: #e74c3c;">
53
- <h3>❌ Có lỗi xảy ra</h3>
54
- <p>Không thể generate code. Vui lòng thử lại.</p>
55
- </div>
56
- """
57
-
58
- # Prompts
59
- self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate."
60
- self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
61
 
62
- def extract_html_content(self, text):
63
- """Extract content including HTML tags."""
64
- match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
65
- return match.group(0) if match else None
 
 
 
 
 
 
66
 
67
  def display_in_sandbox(self, code):
68
  """Hiển thị code trong sandbox iframe"""
@@ -75,59 +224,4 @@ class VoiceCodingService:
75
  return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc;"></iframe>'
76
  except Exception as e:
77
  print(f"❌ Lỗi display sandbox: {e}")
78
- return self.something_happened_html
79
-
80
- def generate_code(self, user_message: tuple[int, np.ndarray], history: list[dict], code: str):
81
- """Generate code từ voice input"""
82
- yield AdditionalOutputs(history, self.spinner_html)
83
-
84
- try:
85
- # Chuyển audio sang text
86
- sr, audio = user_message
87
- audio = audio.squeeze()
88
-
89
- print("🎤 Converting speech to text...")
90
- text = self.groq_client.audio.transcriptions.create(
91
- file=("audio-file.mp3", audio_to_bytes((sr, audio))),
92
- model="whisper-large-v3-turbo",
93
- response_format="verbose_json",
94
- ).text
95
-
96
- print(f"📝 Transcription: {text}")
97
-
98
- # Format user message
99
- user_msg_formatted = self.user_prompt.format(user_message=text, code=code)
100
- history.append({"role": "user", "content": user_msg_formatted})
101
-
102
- # Generate code với Groq
103
- print("🦙 Generating code with Llama...")
104
- response = self.groq_client.chat.completions.create(
105
- model="llama-3.1-8b-instant",
106
- messages=history,
107
- temperature=1,
108
- max_tokens=2048,
109
- top_p=1,
110
- stream=False,
111
- )
112
-
113
- output = response.choices[0].message.content
114
- print("✅ Code generated successfully")
115
-
116
- # Extract HTML code
117
- try:
118
- html_code = self.extract_html_content(output)
119
- if not html_code:
120
- html_code = f"<!-- Generated Code -->\n{output}"
121
- except Exception as e:
122
- print(f"⚠️ Could not extract HTML: {e}")
123
- html_code = self.something_happened_html
124
-
125
- # Update history
126
- history.append({"role": "assistant", "content": output})
127
-
128
- yield AdditionalOutputs(history, html_code)
129
-
130
- except Exception as e:
131
- print(f"❌ Lỗi generate code: {e}")
132
- history.append({"role": "assistant", "content": f"Error: {str(e)}"})
133
- yield AdditionalOutputs(history, self.something_happened_html)
 
2
  import numpy as np
3
  import base64
4
  import re
5
+ import asyncio
6
  from groq import Groq
7
+ from fastrtc import (
8
+ Stream,
9
+ AsyncStreamHandler,
10
  AdditionalOutputs,
11
+ wait_for_item,
12
  get_twilio_turn_credentials,
13
  )
14
+ from gradio.utils import get_space
15
+
16
+ class VoiceCodingHandler(AsyncStreamHandler):
17
+ """FastRTC Handler cho Voice Coding"""
18
+
19
+ def __init__(self, groq_client: Groq):
20
+ super().__init__(
21
+ expected_layout="mono",
22
+ output_sample_rate=24000,
23
+ input_sample_rate=16000,
24
+ )
25
+ self.groq_client = groq_client
26
+ self.input_queue = asyncio.Queue()
27
+ self.output_queue = asyncio.Queue()
28
+ self.is_active = False
29
+
30
+ # Prompts
31
+ self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate."
32
+ self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
33
+
34
+ self.current_history = [{"role": "system", "content": self.system_prompt}]
35
+ self.current_code = ""
36
+
37
+ def copy(self):
38
+ return VoiceCodingHandler(self.groq_client)
39
+
40
+ def extract_html_content(self, text):
41
+ """Extract content including HTML tags."""
42
+ match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
43
+ return match.group(0) if match else None
44
+
45
+ async def start_up(self):
46
+ """Khởi động handler"""
47
+ self.is_active = True
48
+ print("✅ Voice Coding Handler started")
49
+
50
+ async def receive(self, frame: tuple[int, np.ndarray]) -> None:
51
+ """Nhận audio frame"""
52
+ if not self.is_active:
53
+ return
54
+
55
+ sample_rate, array = frame
56
+ array = array.squeeze()
57
+
58
+ # Xử lý audio trong background
59
+ asyncio.create_task(self._process_audio(array, sample_rate))
60
+
61
+ async def _process_audio(self, audio_data: np.ndarray, sample_rate: int):
62
+ """Xử lý audio và generate code"""
63
+ try:
64
+ print("🎤 Processing audio for voice coding...")
65
+
66
+ # Ở đây chúng ta cần chuyển audio sang text
67
+ # Tạm thời sử dụng transcription service có sẵn
68
+ transcription = await self._transcribe_audio(audio_data, sample_rate)
69
+
70
+ if transcription and self._is_trigger_phrase(transcription):
71
+ print(f"🎯 Trigger phrase detected: {transcription}")
72
+
73
+ # Generate loading state
74
+ await self.output_queue.put(AdditionalOutputs({
75
+ "type": "loading",
76
+ "message": "🦙 Llama đang code...",
77
+ "history": self.current_history,
78
+ "code": self.current_code
79
+ }))
80
+
81
+ # Generate code
82
+ await self._generate_code(transcription)
83
+
84
+ except Exception as e:
85
+ print(f"❌ Lỗi xử lý audio: {e}")
86
+
87
+ async def _transcribe_audio(self, audio_data: np.ndarray, sample_rate: int) -> str:
88
+ """Chuyển audio sang text - simplified version"""
89
+ try:
90
+ # Sử dụng transcription service có sẵn từ hệ thống của bạn
91
+ # Hoặc implement Whisper local
92
+ return "Tạo trang web hello world" # Tạm thời return test text
93
+ except Exception as e:
94
+ print(f"❌ Lỗi transcription: {e}")
95
+ return ""
96
+
97
+ def _is_trigger_phrase(self, text: str) -> bool:
98
+ """Kiểm tra trigger phrase"""
99
+ trigger_phrases = ["hello llama", "xin chào llama", "llama", "code"]
100
+ text_lower = text.lower()
101
+ return any(phrase in text_lower for phrase in trigger_phrases)
102
+
103
+ async def _generate_code(self, user_message: str):
104
+ """Generate code từ text input"""
105
+ try:
106
+ # Format user message
107
+ user_msg_formatted = self.user_prompt.format(
108
+ user_message=user_message,
109
+ code=self.current_code
110
+ )
111
+
112
+ # Update history
113
+ self.current_history.append({"role": "user", "content": user_msg_formatted})
114
+
115
+ # Generate code với Groq
116
+ print("🦙 Generating code with Llama...")
117
+ response = self.groq_client.chat.completions.create(
118
+ model="llama-3.3-70b-versatile",
119
+ messages=self.current_history,
120
+ temperature=1,
121
+ max_tokens=2048,
122
+ top_p=1,
123
+ stream=False,
124
+ )
125
+
126
+ output = response.choices[0].message.content
127
+ print("✅ Code generated successfully")
128
+
129
+ # Extract HTML code
130
+ html_code = self.extract_html_content(output)
131
+ if not html_code:
132
+ html_code = f"<!-- Generated Code -->\n{output}"
133
+
134
+ # Update state
135
+ self.current_history.append({"role": "assistant", "content": output})
136
+ self.current_code = html_code
137
+
138
+ # Send result
139
+ await self.output_queue.put(AdditionalOutputs({
140
+ "type": "code_generated",
141
+ "history": self.current_history,
142
+ "code": html_code,
143
+ "message": "✅ Code đã được generate!"
144
+ }))
145
+
146
+ except Exception as e:
147
+ print(f"❌ Lỗi generate code: {e}")
148
+ await self.output_queue.put(AdditionalOutputs({
149
+ "type": "error",
150
+ "message": f"❌ Lỗi: {str(e)}",
151
+ "history": self.current_history,
152
+ "code": self.current_code
153
+ }))
154
+
155
+ async def emit(self):
156
+ """Emit outputs"""
157
+ try:
158
+ return await wait_for_item(self.output_queue)
159
+ except Exception as e:
160
+ print(f"❌ Lỗi emit: {e}")
161
+ return None
162
+
163
+ async def shutdown(self):
164
+ """Dừng handler"""
165
+ self.is_active = False
166
+ print("🛑 Voice Coding Handler stopped")
167
 
168
  class VoiceCodingService:
169
+ """Dịch vụ Voice Coding sử dụng FastRTC"""
170
 
171
  def __init__(self, groq_client: Groq):
172
  self.groq_client = groq_client
173
+ self.rtc_configuration = get_twilio_turn_credentials() if get_space() else None
174
 
175
  # HTML templates
176
+ self.sandbox_html = """
177
+ <div style="text-align: center; padding: 20px;">
178
+ <h3>🎮 Sandbox Preview</h3>
179
+ <p>Code sẽ được hiển thị ở đây sau khi generate</p>
180
+ </div>
181
+ """
182
+
183
+ self.loading_html = """
184
  <div style="text-align: center; padding: 20px;">
185
  <div class="spinner"></div>
186
  <p>🦙 Llama đang code...</p>
 
201
  }
202
  </style>
203
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ def create_stream(self):
206
+ """Tạo FastRTC stream"""
207
+ return Stream(
208
+ VoiceCodingHandler(self.groq_client),
209
+ modality="audio",
210
+ mode="send-receive",
211
+ rtc_configuration=self.rtc_configuration,
212
+ concurrency_limit=5 if get_space() else None,
213
+ time_limit=90 if get_space() else None,
214
+ )
215
 
216
  def display_in_sandbox(self, code):
217
  """Hiển thị code trong sandbox iframe"""
 
224
  return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc;"></iframe>'
225
  except Exception as e:
226
  print(f"❌ Lỗi display sandbox: {e}")
227
+ return f'<div style="color: red;">Lỗi hiển thị sandbox: {str(e)}</div>'