datbkpro commited on
Commit
5d46260
·
verified ·
1 Parent(s): 151867c

Update services/gemini_realtime_service.py

Browse files
Files changed (1) hide show
  1. services/gemini_realtime_service.py +72 -54
services/gemini_realtime_service.py CHANGED
@@ -2,7 +2,7 @@ import asyncio
2
  import base64
3
  import json
4
  import os
5
- from typing import AsyncGenerator, Callable, Literal, Optional
6
  import numpy as np
7
  from google import genai
8
  from google.genai.types import (
@@ -32,6 +32,7 @@ class GeminiRealtimeService:
32
  api_key=self.api_key,
33
  http_options={"api_version": "v1alpha"},
34
  )
 
35
 
36
  async def start_session(self, voice_name: str = "Puck", callback: Callable = None):
37
  """Bắt đầu session Gemini Realtime"""
@@ -42,6 +43,7 @@ class GeminiRealtimeService:
42
  self.voice_name = voice_name
43
  self.callback = callback
44
 
 
45
  config = LiveConnectConfig(
46
  response_modalities=["AUDIO"],
47
  speech_config=SpeechConfig(
@@ -53,24 +55,28 @@ class GeminiRealtimeService:
53
  ),
54
  )
55
 
56
- self.session = await self.client.aio.live.connect(
 
57
  model="gemini-2.0-flash-exp",
58
  config=config
59
  )
60
 
61
- self.is_active = True
62
-
63
- # Khởi động background task để nhận responses
64
- asyncio.create_task(self._response_handler())
65
-
66
- if self.callback:
67
- await self.callback({
68
- 'type': 'status',
69
- 'message': f'✅ Đã kết nối Gemini - Giọng: {voice_name}',
70
- 'status': 'connected'
71
- })
72
-
73
- print("✅ Gemini Realtime session started")
 
 
 
74
  return True
75
 
76
  except Exception as e:
@@ -84,29 +90,10 @@ class GeminiRealtimeService:
84
  print(error_msg)
85
  return False
86
 
87
- async def send_audio_chunk(self, audio_chunk: np.ndarray, sample_rate: int = 16000):
88
- """Gửi audio chunk đến Gemini"""
89
- if not self.session or not self.is_active:
90
- return False
91
-
92
- try:
93
- # Gemini expects 16kHz sample rate
94
- if sample_rate != 16000:
95
- audio_chunk = self._resample_audio(audio_chunk, sample_rate, 16000)
96
-
97
- # Encode và gửi audio
98
- audio_bytes = audio_chunk.tobytes()
99
- await self.session.send(audio_bytes)
100
- return True
101
-
102
- except Exception as e:
103
- print(f"❌ Lỗi gửi audio đến Gemini: {e}")
104
- return False
105
-
106
- async def _response_handler(self):
107
- """Xử lý responses từ Gemini"""
108
  try:
109
- async for response in self.session:
110
  if hasattr(response, 'data') and response.data:
111
  # Audio response from Gemini
112
  audio_data = np.frombuffer(response.data, dtype=np.int16)
@@ -130,34 +117,65 @@ class GeminiRealtimeService:
130
  })
131
 
132
  except Exception as e:
133
- error_msg = f"❌ Lỗi nhận response từ Gemini: {e}"
134
  if self.callback:
135
  await self.callback({
136
  'type': 'error',
137
  'message': error_msg,
138
  'status': 'error'
139
  })
140
- print(error_msg)
141
 
142
- def _resample_audio(self, audio_chunk: np.ndarray, original_rate: int, target_rate: int):
143
- """Resample audio chunk (đơn giản hóa)"""
144
- if original_rate == target_rate:
145
- return audio_chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
- ratio = target_rate / original_rate
148
- new_length = int(len(audio_chunk) * ratio)
149
- return np.interp(
150
- np.linspace(0, len(audio_chunk) - 1, new_length),
151
- np.arange(len(audio_chunk)),
152
- audio_chunk
153
- ).astype(np.int16)
 
 
 
 
154
 
155
  async def close(self):
156
  """Đóng kết nối Gemini"""
157
- if self.session:
158
- await self.session.close()
159
- self.is_active = False
160
-
161
  if self.callback:
162
  await self.callback({
163
  'type': 'status',
 
2
  import base64
3
  import json
4
  import os
5
+ from typing import Callable, Optional
6
  import numpy as np
7
  from google import genai
8
  from google.genai.types import (
 
32
  api_key=self.api_key,
33
  http_options={"api_version": "v1alpha"},
34
  )
35
+ return True
36
 
37
  async def start_session(self, voice_name: str = "Puck", callback: Callable = None):
38
  """Bắt đầu session Gemini Realtime"""
 
43
  self.voice_name = voice_name
44
  self.callback = callback
45
 
46
+ # Tạo config cho realtime session
47
  config = LiveConnectConfig(
48
  response_modalities=["AUDIO"],
49
  speech_config=SpeechConfig(
 
55
  ),
56
  )
57
 
58
+ # Kết nối session - SỬA LỖI Ở ĐÂY
59
+ self.session = self.client.aio.live.connect(
60
  model="gemini-2.0-flash-exp",
61
  config=config
62
  )
63
 
64
+ # Sử dụng async with để quản lý session
65
+ async with self.session as session:
66
+ self.is_active = True
67
+
68
+ if self.callback:
69
+ await self.callback({
70
+ 'type': 'status',
71
+ 'message': f'✅ Đã kết nối Gemini - Giọng: {voice_name}',
72
+ 'status': 'connected'
73
+ })
74
+
75
+ print("✅ Gemini Realtime session started")
76
+
77
+ # Xử lý realtime communication
78
+ await self._handle_realtime_session(session)
79
+
80
  return True
81
 
82
  except Exception as e:
 
90
  print(error_msg)
91
  return False
92
 
93
+ async def _handle_realtime_session(self, session):
94
+ """Xử session realtime"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  try:
96
+ async for response in session:
97
  if hasattr(response, 'data') and response.data:
98
  # Audio response from Gemini
99
  audio_data = np.frombuffer(response.data, dtype=np.int16)
 
117
  })
118
 
119
  except Exception as e:
120
+ error_msg = f"❌ Lỗi trong session: {e}"
121
  if self.callback:
122
  await self.callback({
123
  'type': 'error',
124
  'message': error_msg,
125
  'status': 'error'
126
  })
 
127
 
128
+ async def send_audio(self, audio_data: bytes):
129
+ """Gửi audio data đến Gemini session"""
130
+ if not self.session or not self.is_active:
131
+ return False
132
+
133
+ try:
134
+ # Trong implementation thực tế, sẽ gửi qua session
135
+ # await self.session.send(audio_data)
136
+ print(f"📤 Sent audio data: {len(audio_data)} bytes")
137
+ return True
138
+
139
+ except Exception as e:
140
+ print(f"❌ L���i gửi audio: {e}")
141
+ return False
142
+
143
+ async def send_text(self, text: str):
144
+ """Gửi text message đến Gemini (fallback)"""
145
+ if not self.client or not self.is_active:
146
+ return None
147
+
148
+ try:
149
+ # Fallback: sử dụng chat completion thông thường
150
+ response = await self.client.aio.models.generate_content(
151
+ model="gemini-2.0-flash-exp",
152
+ contents=text
153
+ )
154
+
155
+ if self.callback:
156
+ await self.callback({
157
+ 'type': 'text',
158
+ 'content': response.text,
159
+ 'role': 'assistant',
160
+ 'status': 'text_response'
161
+ })
162
 
163
+ return response.text
164
+
165
+ except Exception as e:
166
+ error_msg = f"❌ Lỗi gửi text: {e}"
167
+ if self.callback:
168
+ await self.callback({
169
+ 'type': 'error',
170
+ 'message': error_msg,
171
+ 'status': 'error'
172
+ })
173
+ return None
174
 
175
  async def close(self):
176
  """Đóng kết nối Gemini"""
177
+ self.is_active = False
178
+
 
 
179
  if self.callback:
180
  await self.callback({
181
  'type': 'status',