datbkpro commited on
Commit
5d2bcc5
·
verified ·
1 Parent(s): d3c4249

Update services/openai_realtime_service.py

Browse files
Files changed (1) hide show
  1. services/openai_realtime_service.py +226 -0
services/openai_realtime_service.py CHANGED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ from pathlib import Path
5
+ import gradio as gr
6
+ import numpy as np
7
+ import openai
8
+ from dotenv import load_dotenv
9
+ from fastapi import FastAPI
10
+ from fastapi.responses import HTMLResponse, StreamingResponse
11
+
12
+ # Thêm vào imports hiện tại
13
+ from fastrtc import (
14
+ AdditionalOutputs,
15
+ AsyncStreamHandler,
16
+ Stream,
17
+ get_twilio_turn_credentials,
18
+ wait_for_item,
19
+ )
20
+ from gradio.utils import get_space
21
+
22
+ class OpenAIRealtimeService:
23
+ """Dịch vụ OpenAI Realtime API cho streaming chất lượng cao"""
24
+
25
+ def __init__(self, api_key: str):
26
+ self.client = openai.AsyncOpenAI(api_key=api_key)
27
+ self.connection = None
28
+ self.is_active = False
29
+
30
+ async def start_session(self):
31
+ """Bắt đầu session OpenAI Realtime"""
32
+ try:
33
+ self.connection = await self.client.beta.realtime.connect(
34
+ model="gpt-4o-mini-realtime-preview-2024-12-17"
35
+ )
36
+
37
+ # Cấu hình session
38
+ await self.connection.session.update(
39
+ session={
40
+ "turn_detection": {"type": "server_vad"},
41
+ "input_audio_transcription": {
42
+ "model": "whisper-1",
43
+ "language": "vi", # Hỗ trợ tiếng Việt
44
+ },
45
+ }
46
+ )
47
+
48
+ self.is_active = True
49
+ print("✅ OpenAI Realtime session started")
50
+ return True
51
+
52
+ except Exception as e:
53
+ print(f"❌ Lỗi khởi động OpenAI Realtime: {e}")
54
+ return False
55
+
56
+ async def process_audio_chunk(self, audio_chunk: np.ndarray, sample_rate: int = 24000):
57
+ """Xử lý audio chunk với OpenAI Realtime API"""
58
+ if not self.connection or not self.is_active:
59
+ return None
60
+
61
+ try:
62
+ # Chuẩn hóa audio
63
+ if sample_rate != 24000:
64
+ audio_chunk = self._resample_audio(audio_chunk, sample_rate, 24000)
65
+
66
+ # Encode audio
67
+ audio_message = base64.b64encode(audio_chunk.tobytes()).decode("utf-8")
68
+
69
+ # Gửi đến OpenAI
70
+ await self.connection.input_audio_buffer.append(audio=audio_message)
71
+
72
+ except Exception as e:
73
+ print(f"❌ Lỗi xử lý audio với OpenAI: {e}")
74
+
75
+ async def get_responses(self):
76
+ """Lấy responses từ OpenAI Realtime API"""
77
+ if not self.connection:
78
+ return
79
+
80
+ async for event in self.connection:
81
+ if event.type == "input_audio_buffer.speech_started":
82
+ yield {"type": "speech_started"}
83
+
84
+ elif event.type == "conversation.item.input_audio_transcription.completed":
85
+ yield {
86
+ "type": "user_transcription",
87
+ "content": event.transcript,
88
+ "role": "user"
89
+ }
90
+
91
+ elif event.type == "response.audio_transcript.done":
92
+ yield {
93
+ "type": "assistant_transcription",
94
+ "content": event.transcript,
95
+ "role": "assistant"
96
+ }
97
+
98
+ elif event.type == "response.audio.delta":
99
+ audio_data = np.frombuffer(
100
+ base64.b64decode(event.delta), dtype=np.int16
101
+ )
102
+ yield {
103
+ "type": "audio_delta",
104
+ "audio": audio_data,
105
+ "sample_rate": 24000
106
+ }
107
+
108
+ async def close(self):
109
+ """Đóng kết nối"""
110
+ if self.connection:
111
+ await self.connection.close()
112
+ self.is_active = False
113
+ print("🛑 OpenAI Realtime session closed")
114
+
115
+ class HybridStreamingService:
116
+ """Service kết hợp VOSK local và OpenAI Realtime"""
117
+
118
+ def __init__(self, groq_client, rag_system, tts_service, openai_key: str = None):
119
+ self.groq_client = groq_client
120
+ self.rag_system = rag_system
121
+ self.tts_service = tts_service
122
+
123
+ # Local ASR với VOSK
124
+ self.vosk_asr = VoskStreamingASR()
125
+ self.vad_processor = SileroVAD()
126
+
127
+ # OpenAI Realtime API
128
+ self.openai_service = None
129
+ if openai_key:
130
+ self.openai_service = OpenAIRealtimeService(openai_key)
131
+
132
+ self.current_mode = "local" # 'local' hoặc 'openai'
133
+ self.is_listening = False
134
+
135
+ async def start_listening(self, speech_callback: Callable, mode: str = "auto"):
136
+ """Bắt đầu lắng nghe với mode lựa chọn"""
137
+ self.current_callback = speech_callback
138
+
139
+ if mode == "openai" and self.openai_service:
140
+ return await self._start_openai_mode()
141
+ else:
142
+ return self._start_local_mode()
143
+
144
+ async def _start_openai_mode(self):
145
+ """Khởi động chế độ OpenAI Realtime"""
146
+ try:
147
+ success = await self.openai_service.start_session()
148
+ if success:
149
+ self.is_listening = True
150
+ self.current_mode = "openai"
151
+
152
+ # Khởi động background task để nhận responses
153
+ asyncio.create_task(self._openai_response_handler())
154
+
155
+ if self.current_callback:
156
+ self.current_callback({
157
+ 'transcription': "Đã bắt đầu với OpenAI Realtime...",
158
+ 'response': "",
159
+ 'tts_audio': None,
160
+ 'status': 'openai_listening'
161
+ })
162
+
163
+ return True
164
+ return False
165
+
166
+ except Exception as e:
167
+ print(f"❌ Lỗi khởi động OpenAI mode: {e}")
168
+ return False
169
+
170
+ def _start_local_mode(self):
171
+ """Khởi động chế độ local VOSK"""
172
+ try:
173
+ if self.vosk_asr.start_stream() and self.vad_processor.start_stream(self._on_speech_detected):
174
+ self.is_listening = True
175
+ self.current_mode = "local"
176
+
177
+ # Khởi động worker threads
178
+ self._start_worker_threads()
179
+
180
+ if self.current_callback:
181
+ self.current_callback({
182
+ 'transcription': "Đã bắt đầu với VOSK local...",
183
+ 'response': "",
184
+ 'tts_audio': None,
185
+ 'status': 'local_listening'
186
+ })
187
+
188
+ return True
189
+ return False
190
+
191
+ except Exception as e:
192
+ print(f"❌ Lỗi khởi động local mode: {e}")
193
+ return False
194
+
195
+ async def _openai_response_handler(self):
196
+ """Xử lý responses từ OpenAI Realtime"""
197
+ try:
198
+ async for response in self.openai_service.get_responses():
199
+ if response['type'] == 'user_transcription' and self.current_callback:
200
+ self.current_callback({
201
+ 'transcription': response['content'],
202
+ 'response': "Đang xử lý...",
203
+ 'tts_audio': None,
204
+ 'status': 'processing'
205
+ })
206
+
207
+ elif response['type'] == 'assistant_transcription' and self.current_callback:
208
+ self.current_callback({
209
+ 'transcription': "", # Giữ transcription cũ
210
+ 'response': response['content'],
211
+ 'tts_audio': None,
212
+ 'status': 'completed'
213
+ })
214
+
215
+ elif response['type'] == 'audio_delta' and self.current_callback:
216
+ # Xử lý audio real-time từ OpenAI
217
+ audio_path = self._save_temp_audio(response['audio'], response['sample_rate'])
218
+ self.current_callback({
219
+ 'transcription': "",
220
+ 'response': "",
221
+ 'tts_audio': audio_path,
222
+ 'status': 'audio_streaming'
223
+ })
224
+
225
+ except Exception as e:
226
+ print(f"❌ Lỗi OpenAI response handler: {e}")