Update ui/tabs.py
Browse files- ui/tabs.py +132 -2
ui/tabs.py
CHANGED
|
@@ -7,6 +7,7 @@ from services.audio_service import AudioService
|
|
| 7 |
from services.chat_service import ChatService
|
| 8 |
from services.image_service import ImageService
|
| 9 |
from services.streaming_voice_service import StreamingVoiceService
|
|
|
|
| 10 |
from services.stream_object_detection_service import StreamObjectDetection
|
| 11 |
from core.rag_system import EnhancedRAGSystem
|
| 12 |
from core.tts_service import EnhancedTTSService
|
|
@@ -17,11 +18,13 @@ def create_all_tabs(audio_service: AudioService, chat_service: ChatService,
|
|
| 17 |
image_service: ImageService, rag_system: EnhancedRAGSystem,
|
| 18 |
tts_service: EnhancedTTSService, wikipedia_processor: WikipediaProcessor,
|
| 19 |
streaming_voice_service: StreamingVoiceService,
|
|
|
|
| 20 |
):
|
| 21 |
|
| 22 |
with gr.Tab("🎙️ Streaming Voice (VAD)"):
|
| 23 |
create_streaming_voice_tab(streaming_voice_service)
|
| 24 |
-
|
|
|
|
| 25 |
with gr.Tab("🎙️ Audio"):
|
| 26 |
create_audio_tab(audio_service)
|
| 27 |
|
|
@@ -40,7 +43,134 @@ def create_all_tabs(audio_service: AudioService, chat_service: ChatService,
|
|
| 40 |
with gr.Tab("🌐 Language Info"): # NEW TAB
|
| 41 |
create_language_info_tab(rag_system.multilingual_manager)
|
| 42 |
with gr.Tab("Stream Object Detection"):
|
| 43 |
-
create_streaming_object_detection()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
def create_streaming_object_detection():
|
| 45 |
with gr.Blocks() as object_detection_tab:
|
| 46 |
gr.HTML(
|
|
|
|
| 7 |
from services.chat_service import ChatService
|
| 8 |
from services.image_service import ImageService
|
| 9 |
from services.streaming_voice_service import StreamingVoiceService
|
| 10 |
+
from services.openai_realtime_service import HybridStreamingService
|
| 11 |
from services.stream_object_detection_service import StreamObjectDetection
|
| 12 |
from core.rag_system import EnhancedRAGSystem
|
| 13 |
from core.tts_service import EnhancedTTSService
|
|
|
|
| 18 |
image_service: ImageService, rag_system: EnhancedRAGSystem,
|
| 19 |
tts_service: EnhancedTTSService, wikipedia_processor: WikipediaProcessor,
|
| 20 |
streaming_voice_service: StreamingVoiceService,
|
| 21 |
+
hybrid_service: HybridStreamingService
|
| 22 |
):
|
| 23 |
|
| 24 |
with gr.Tab("🎙️ Streaming Voice (VAD)"):
|
| 25 |
create_streaming_voice_tab(streaming_voice_service)
|
| 26 |
+
with gr.Tab("Stream Object Detection"):
|
| 27 |
+
create_openai_realtime_tab()
|
| 28 |
with gr.Tab("🎙️ Audio"):
|
| 29 |
create_audio_tab(audio_service)
|
| 30 |
|
|
|
|
| 43 |
with gr.Tab("🌐 Language Info"): # NEW TAB
|
| 44 |
create_language_info_tab(rag_system.multilingual_manager)
|
| 45 |
with gr.Tab("Stream Object Detection"):
|
| 46 |
+
create_streaming_object_detection(hybrid_service)
|
| 47 |
+
|
| 48 |
+
def create_openai_realtime_tab(hybrid_service: HybridStreamingService):
|
| 49 |
+
"""Tạo tab cho OpenAI Realtime API"""
|
| 50 |
+
|
| 51 |
+
with gr.Blocks() as openai_tab:
|
| 52 |
+
gr.Markdown("## 🤖 OpenAI Realtime API - Streaming Chất Lượng Cao")
|
| 53 |
+
|
| 54 |
+
with gr.Row():
|
| 55 |
+
with gr.Column(scale=1):
|
| 56 |
+
# Mode selection
|
| 57 |
+
mode_selector = gr.Radio(
|
| 58 |
+
choices=["local", "openai", "auto"],
|
| 59 |
+
value="auto",
|
| 60 |
+
label="Chế độ nhận diện",
|
| 61 |
+
info="Local: VOSK (nhanh), OpenAI: Chất lượng cao"
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
start_btn = gr.Button("🎙️ Bắt đầu Streaming", variant="primary")
|
| 65 |
+
stop_btn = gr.Button("🛑 Dừng", variant="secondary")
|
| 66 |
+
|
| 67 |
+
status_display = gr.Textbox(
|
| 68 |
+
label="Trạng thái",
|
| 69 |
+
value="Chưa kết nối",
|
| 70 |
+
interactive=False
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# OpenAI specific controls
|
| 74 |
+
with gr.Accordion("⚙️ Cài đặt OpenAI", open=False):
|
| 75 |
+
api_key = gr.Textbox(
|
| 76 |
+
label="OpenAI API Key",
|
| 77 |
+
type="password",
|
| 78 |
+
placeholder="Nhập API key...",
|
| 79 |
+
info="Cần cho chế độ OpenAI Realtime"
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
language_select = gr.Dropdown(
|
| 83 |
+
choices=["vi", "en", "fr", "es", "de", "ja", "zh"],
|
| 84 |
+
value="vi",
|
| 85 |
+
label="Ngôn ngữ"
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
with gr.Column(scale=2):
|
| 89 |
+
chatbot = gr.Chatbot(
|
| 90 |
+
label="💬 Hội thoại",
|
| 91 |
+
type="messages",
|
| 92 |
+
height=400
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
transcription_display = gr.Textbox(
|
| 96 |
+
label="🎤 Bạn nói",
|
| 97 |
+
interactive=False,
|
| 98 |
+
lines=2
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
audio_output = gr.Audio(
|
| 102 |
+
label="🔊 Phản hồi AI",
|
| 103 |
+
interactive=False,
|
| 104 |
+
autoplay=True
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# State management
|
| 108 |
+
connection_state = gr.State(value=False)
|
| 109 |
+
|
| 110 |
+
async def start_streaming(mode, api_key, language, history):
|
| 111 |
+
"""Bắt đầu streaming với mode đã chọn"""
|
| 112 |
+
try:
|
| 113 |
+
# Cập nhật service với API key
|
| 114 |
+
if api_key and not hybrid_service.openai_service:
|
| 115 |
+
hybrid_service.openai_service = OpenAIRealtimeService(api_key)
|
| 116 |
+
|
| 117 |
+
success = await hybrid_service.start_listening(
|
| 118 |
+
speech_callback=lambda x: None, # Will be handled via events
|
| 119 |
+
mode=mode
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
if success:
|
| 123 |
+
return True, f"✅ Đã kết nối - Chế độ: {mode}", history
|
| 124 |
+
else:
|
| 125 |
+
return False, "❌ Không thể kết nối", history
|
| 126 |
+
|
| 127 |
+
except Exception as e:
|
| 128 |
+
return False, f"❌ Lỗi: {str(e)}", history
|
| 129 |
+
|
| 130 |
+
def stop_streaming():
|
| 131 |
+
"""Dừng streaming"""
|
| 132 |
+
hybrid_service.stop_listening()
|
| 133 |
+
return False, "🛑 Đã dừng streaming", []
|
| 134 |
+
|
| 135 |
+
def update_chat(history, message, role="user"):
|
| 136 |
+
"""Cập nhật chat history"""
|
| 137 |
+
if role == "user":
|
| 138 |
+
history.append({"role": "user", "content": message})
|
| 139 |
+
else:
|
| 140 |
+
history.append({"role": "assistant", "content": message})
|
| 141 |
+
return history
|
| 142 |
+
|
| 143 |
+
# Event handlers
|
| 144 |
+
start_btn.click(
|
| 145 |
+
start_streaming,
|
| 146 |
+
inputs=[mode_selector, api_key, language_select, chatbot],
|
| 147 |
+
outputs=[connection_state, status_display, chatbot]
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
stop_btn.click(
|
| 151 |
+
stop_streaming,
|
| 152 |
+
outputs=[connection_state, status_display, chatbot]
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Real-time updates via JavaScript
|
| 156 |
+
openai_tab.load(
|
| 157 |
+
fn=None,
|
| 158 |
+
inputs=[],
|
| 159 |
+
outputs=[],
|
| 160 |
+
js="""
|
| 161 |
+
function setupEventSource() {
|
| 162 |
+
const eventSource = new EventSource('/outputs');
|
| 163 |
+
eventSource.onmessage = function(event) {
|
| 164 |
+
const data = JSON.parse(event.data);
|
| 165 |
+
// Handle real-time updates from OpenAI
|
| 166 |
+
console.log('OpenAI event:', data);
|
| 167 |
+
};
|
| 168 |
+
}
|
| 169 |
+
setupEventSource();
|
| 170 |
+
"""
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
return openai_tab
|
| 174 |
def create_streaming_object_detection():
|
| 175 |
with gr.Blocks() as object_detection_tab:
|
| 176 |
gr.HTML(
|