Spaces:
Build error
Build error
Update Gradio app with multiple files
Browse files- app.py +200 -16
- requirements.txt +54 -6
app.py
CHANGED
|
@@ -1,20 +1,23 @@
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
import numpy as np
|
| 4 |
-
from typing import AsyncGenerator, List, Dict
|
| 5 |
import gradio as gr
|
| 6 |
import google.generativeai as genai
|
| 7 |
from fastrtc import Stream, ReplyOnPause, get_cloudflare_turn_credentials, get_tts_model, get_stt_model
|
| 8 |
import spaces
|
| 9 |
import time
|
| 10 |
from dataclasses import dataclass
|
| 11 |
-
|
| 12 |
|
| 13 |
-
# Configure Gemini API
|
| 14 |
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
| 15 |
|
| 16 |
-
# Initialize models
|
| 17 |
-
model = genai.GenerativeModel(
|
|
|
|
|
|
|
|
|
|
| 18 |
stt_model = get_stt_model()
|
| 19 |
tts_model = get_tts_model()
|
| 20 |
|
|
@@ -24,18 +27,21 @@ class ConversationState:
|
|
| 24 |
is_processing: bool = False
|
| 25 |
last_transcript: str = ""
|
| 26 |
last_response: str = ""
|
|
|
|
|
|
|
| 27 |
|
| 28 |
class GeminiVoiceHandler:
|
| 29 |
def __init__(self):
|
| 30 |
self.state = ConversationState(messages=[])
|
| 31 |
-
self.system_prompt = "You are a helpful and friendly AI assistant. Respond in a natural, conversational tone. Keep responses concise and engaging."
|
| 32 |
|
| 33 |
async def process_audio(self, audio: tuple[int, np.ndarray]) -> AsyncGenerator[tuple[int, np.ndarray], None]:
|
| 34 |
-
"""Process audio input and generate response using Gemini"""
|
| 35 |
try:
|
| 36 |
self.state.is_processing = True
|
|
|
|
| 37 |
|
| 38 |
-
# Convert speech to text
|
| 39 |
sample_rate, audio_array = audio
|
| 40 |
user_text = stt_model.stt(audio)
|
| 41 |
|
|
@@ -43,11 +49,11 @@ class GeminiVoiceHandler:
|
|
| 43 |
self.state.is_processing = False
|
| 44 |
return
|
| 45 |
|
| 46 |
-
# Update state
|
| 47 |
self.state.last_transcript = user_text
|
| 48 |
self.state.messages.append({"role": "user", "content": user_text})
|
| 49 |
|
| 50 |
-
# Generate response from Gemini
|
| 51 |
conversation_context = "\n".join([
|
| 52 |
f"{msg['role']}: {msg['content']}"
|
| 53 |
for msg in self.state.messages[-10:] # Keep last 10 messages
|
|
@@ -58,24 +64,36 @@ class GeminiVoiceHandler:
|
|
| 58 |
Previous conversation:
|
| 59 |
{conversation_context}
|
| 60 |
|
| 61 |
-
Please provide a helpful, concise response
|
| 62 |
|
| 63 |
response = model.generate_content(prompt)
|
| 64 |
assistant_text = response.text
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# Update state
|
| 67 |
self.state.last_response = assistant_text
|
| 68 |
self.state.messages.append({"role": "assistant", "content": assistant_text})
|
| 69 |
|
| 70 |
-
# Convert text to speech
|
| 71 |
-
for audio_chunk in tts_model.stream_tts_sync(assistant_text):
|
|
|
|
| 72 |
yield audio_chunk
|
| 73 |
|
| 74 |
self.state.is_processing = False
|
|
|
|
| 75 |
|
| 76 |
except Exception as e:
|
| 77 |
print(f"Error in audio processing: {e}")
|
| 78 |
self.state.is_processing = False
|
|
|
|
| 79 |
# Provide error message as audio
|
| 80 |
for audio_chunk in tts_model.stream_tts_sync("I'm sorry, I encountered an error. Please try again."):
|
| 81 |
yield audio_chunk
|
|
@@ -141,6 +159,58 @@ custom_css = """
|
|
| 141 |
animation: pulse 2s infinite;
|
| 142 |
}
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
@keyframes pulse {
|
| 145 |
0%, 100% {
|
| 146 |
transform: scale(1);
|
|
@@ -434,10 +504,17 @@ with gr.Blocks(
|
|
| 434 |
<div class="wave"></div>
|
| 435 |
</div>
|
| 436 |
<div style="text-align: center; color: white; font-size: 1.2rem; font-weight: 600;">
|
| 437 |
-
π€ Voice Interface
|
| 438 |
</div>
|
| 439 |
<div style="text-align: center; color: rgba(255,255,255,0.9); margin-top: 0.5rem;">
|
| 440 |
-
Speak naturally
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
</div>
|
| 442 |
""")
|
| 443 |
|
|
@@ -495,4 +572,111 @@ with gr.Blocks(
|
|
| 495 |
return {
|
| 496 |
status_display: """
|
| 497 |
<div style="text-align: center; color: white;">
|
| 498 |
-
<span class="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
import numpy as np
|
| 4 |
+
from typing import AsyncGenerator, List, Dict, Optional
|
| 5 |
import gradio as gr
|
| 6 |
import google.generativeai as genai
|
| 7 |
from fastrtc import Stream, ReplyOnPause, get_cloudflare_turn_credentials, get_tts_model, get_stt_model
|
| 8 |
import spaces
|
| 9 |
import time
|
| 10 |
from dataclasses import dataclass
|
| 11 |
+
import json
|
| 12 |
|
| 13 |
+
# Configure Gemini API with enhanced tools
|
| 14 |
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
| 15 |
|
| 16 |
+
# Initialize models with Google Grounding Tools
|
| 17 |
+
model = genai.GenerativeModel(
|
| 18 |
+
'gemini-1.5-pro-latest',
|
| 19 |
+
tools=[genai.types.Tool(google_search_retrieval=genai.types.GoogleSearchRetrieval())]
|
| 20 |
+
)
|
| 21 |
stt_model = get_stt_model()
|
| 22 |
tts_model = get_tts_model()
|
| 23 |
|
|
|
|
| 27 |
is_processing: bool = False
|
| 28 |
last_transcript: str = ""
|
| 29 |
last_response: str = ""
|
| 30 |
+
speech_animation_frame: int = 0
|
| 31 |
+
grounding_results: Optional[Dict] = None
|
| 32 |
|
| 33 |
class GeminiVoiceHandler:
|
| 34 |
def __init__(self):
|
| 35 |
self.state = ConversationState(messages=[])
|
| 36 |
+
self.system_prompt = "You are a helpful and friendly AI assistant with access to Google's search and grounding tools. Respond in a natural, conversational tone. Keep responses concise and engaging. Use search when helpful to provide accurate information."
|
| 37 |
|
| 38 |
async def process_audio(self, audio: tuple[int, np.ndarray]) -> AsyncGenerator[tuple[int, np.ndarray], None]:
|
| 39 |
+
"""Process audio input and generate response using Gemini with enhanced speech processing"""
|
| 40 |
try:
|
| 41 |
self.state.is_processing = True
|
| 42 |
+
self.state.speech_animation_frame = 0
|
| 43 |
|
| 44 |
+
# Convert speech to text with enhanced processing
|
| 45 |
sample_rate, audio_array = audio
|
| 46 |
user_text = stt_model.stt(audio)
|
| 47 |
|
|
|
|
| 49 |
self.state.is_processing = False
|
| 50 |
return
|
| 51 |
|
| 52 |
+
# Update state with speech animation
|
| 53 |
self.state.last_transcript = user_text
|
| 54 |
self.state.messages.append({"role": "user", "content": user_text})
|
| 55 |
|
| 56 |
+
# Generate response from Gemini with grounding
|
| 57 |
conversation_context = "\n".join([
|
| 58 |
f"{msg['role']}: {msg['content']}"
|
| 59 |
for msg in self.state.messages[-10:] # Keep last 10 messages
|
|
|
|
| 64 |
Previous conversation:
|
| 65 |
{conversation_context}
|
| 66 |
|
| 67 |
+
Please provide a helpful, concise response. Use Google search when needed for current information."""
|
| 68 |
|
| 69 |
response = model.generate_content(prompt)
|
| 70 |
assistant_text = response.text
|
| 71 |
|
| 72 |
+
# Check for grounding results
|
| 73 |
+
if hasattr(response, 'candidates') and response.candidates:
|
| 74 |
+
candidate = response.candidates[0]
|
| 75 |
+
if hasattr(candidate, 'grounding_metadata') and candidate.grounding_metadata:
|
| 76 |
+
self.state.grounding_results = {
|
| 77 |
+
'search_entry_point': candidate.grounding_metadata.search_entry_point,
|
| 78 |
+
'grounding_chunks': candidate.grounding_metadata.grounding_chunks
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
# Update state
|
| 82 |
self.state.last_response = assistant_text
|
| 83 |
self.state.messages.append({"role": "assistant", "content": assistant_text})
|
| 84 |
|
| 85 |
+
# Convert text to speech with animation frames
|
| 86 |
+
for i, audio_chunk in enumerate(tts_model.stream_tts_sync(assistant_text)):
|
| 87 |
+
self.state.speech_animation_frame = i % 10
|
| 88 |
yield audio_chunk
|
| 89 |
|
| 90 |
self.state.is_processing = False
|
| 91 |
+
self.state.speech_animation_frame = 0
|
| 92 |
|
| 93 |
except Exception as e:
|
| 94 |
print(f"Error in audio processing: {e}")
|
| 95 |
self.state.is_processing = False
|
| 96 |
+
self.state.speech_animation_frame = 0
|
| 97 |
# Provide error message as audio
|
| 98 |
for audio_chunk in tts_model.stream_tts_sync("I'm sorry, I encountered an error. Please try again."):
|
| 99 |
yield audio_chunk
|
|
|
|
| 159 |
animation: pulse 2s infinite;
|
| 160 |
}
|
| 161 |
|
| 162 |
+
.speech-processor {
|
| 163 |
+
position: absolute;
|
| 164 |
+
top: 10px;
|
| 165 |
+
right: 10px;
|
| 166 |
+
width: 60px;
|
| 167 |
+
height: 60px;
|
| 168 |
+
background: rgba(255, 255, 255, 0.2);
|
| 169 |
+
border-radius: 50%;
|
| 170 |
+
display: flex;
|
| 171 |
+
align-items: center;
|
| 172 |
+
justify-content: center;
|
| 173 |
+
backdrop-filter: blur(10px);
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
.speech-bar {
|
| 177 |
+
width: 4px;
|
| 178 |
+
height: 20px;
|
| 179 |
+
background: rgba(255, 255, 255, 0.8);
|
| 180 |
+
margin: 0 2px;
|
| 181 |
+
border-radius: 2px;
|
| 182 |
+
animation: speech-wave 0.5s infinite ease-in-out;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
.speech-bar:nth-child(1) { animation-delay: 0s; height: 15px; }
|
| 186 |
+
.speech-bar:nth-child(2) { animation-delay: 0.1s; height: 25px; }
|
| 187 |
+
.speech-bar:nth-child(3) { animation-delay: 0.2s; height: 20px; }
|
| 188 |
+
.speech-bar:nth-child(4) { animation-delay: 0.3s; height: 30px; }
|
| 189 |
+
.speech-bar:nth-child(5) { animation-delay: 0.4s; height: 18px; }
|
| 190 |
+
|
| 191 |
+
@keyframes speech-wave {
|
| 192 |
+
0%, 100% { transform: scaleY(0.5); opacity: 0.5; }
|
| 193 |
+
50% { transform: scaleY(1); opacity: 1; }
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
.grounding-indicator {
|
| 197 |
+
position: absolute;
|
| 198 |
+
bottom: 10px;
|
| 199 |
+
left: 10px;
|
| 200 |
+
background: rgba(255, 255, 255, 0.9);
|
| 201 |
+
padding: 5px 10px;
|
| 202 |
+
border-radius: 15px;
|
| 203 |
+
font-size: 0.8rem;
|
| 204 |
+
color: #667eea;
|
| 205 |
+
font-weight: 600;
|
| 206 |
+
animation: fadeInUp 0.3s ease-out;
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
@keyframes fadeInUp {
|
| 210 |
+
from { opacity: 0; transform: translateY(10px); }
|
| 211 |
+
to { opacity: 1; transform: translateY(0); }
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
@keyframes pulse {
|
| 215 |
0%, 100% {
|
| 216 |
transform: scale(1);
|
|
|
|
| 504 |
<div class="wave"></div>
|
| 505 |
</div>
|
| 506 |
<div style="text-align: center; color: white; font-size: 1.2rem; font-weight: 600;">
|
| 507 |
+
π€ Enhanced Voice Interface
|
| 508 |
</div>
|
| 509 |
<div style="text-align: center; color: rgba(255,255,255,0.9); margin-top: 0.5rem;">
|
| 510 |
+
Speak naturally with Google Grounding & Search
|
| 511 |
+
</div>
|
| 512 |
+
<div class="speech-processor" id="speechProcessor">
|
| 513 |
+
<div class="speech-bar"></div>
|
| 514 |
+
<div class="speech-bar"></div>
|
| 515 |
+
<div class="speech-bar"></div>
|
| 516 |
+
<div class="speech-bar"></div>
|
| 517 |
+
<div class="speech-bar"></div>
|
| 518 |
</div>
|
| 519 |
""")
|
| 520 |
|
|
|
|
| 572 |
return {
|
| 573 |
status_display: """
|
| 574 |
<div style="text-align: center; color: white;">
|
| 575 |
+
<span class="status-indicator status-active"></span>
|
| 576 |
+
<span>Connected - Speak Now</span>
|
| 577 |
+
<div class="processing-indicator">
|
| 578 |
+
<span class="processing-dot"></span>
|
| 579 |
+
<span class="processing-dot"></span>
|
| 580 |
+
<span class="processing-dot"></span>
|
| 581 |
+
</div>
|
| 582 |
+
</div>
|
| 583 |
+
"""
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
def stop_chat():
|
| 587 |
+
return {
|
| 588 |
+
status_display: """
|
| 589 |
+
<div style="text-align: center; color: white;">
|
| 590 |
+
<span class="status-indicator status-inactive"></span>
|
| 591 |
+
<span>Disconnected</span>
|
| 592 |
+
</div>
|
| 593 |
+
"""
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
def clear_conversation():
|
| 597 |
+
handler.state.messages = []
|
| 598 |
+
handler.state.last_transcript = ""
|
| 599 |
+
handler.state.last_response = ""
|
| 600 |
+
return {
|
| 601 |
+
conversation_display: """
|
| 602 |
+
<div style="text-align: center; color: #999; padding: 2rem;">
|
| 603 |
+
Conversation cleared. Start a new one...
|
| 604 |
+
</div>
|
| 605 |
+
""",
|
| 606 |
+
status_info: "π Conversation cleared"
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
def update_interface():
|
| 610 |
+
"""Update the interface with current conversation state with enhanced animations"""
|
| 611 |
+
status, status_class, conversation_html = get_conversation_state()
|
| 612 |
+
|
| 613 |
+
if conversation_html:
|
| 614 |
+
formatted_html = f"""
|
| 615 |
+
<div style="max-height: 400px; overflow-y: auto; padding: 1rem;">
|
| 616 |
+
{conversation_html.replace('\n\n', '</div><div class="conversation-bubble assistant-bubble">').replace('**π€ You:**', '</div><div class="conversation-bubble user-bubble">').replace('**π€ Gemini:**', '</div><div class="conversation-bubble assistant-bubble">')}
|
| 617 |
+
</div>
|
| 618 |
+
"""
|
| 619 |
+
else:
|
| 620 |
+
formatted_html = """
|
| 621 |
+
<div style="text-align: center; color: #999; padding: 2rem;">
|
| 622 |
+
Start a conversation to see it here...
|
| 623 |
+
</div>
|
| 624 |
+
"""
|
| 625 |
+
|
| 626 |
+
processing_indicator = ""
|
| 627 |
+
if handler.state.is_processing:
|
| 628 |
+
processing_indicator = """
|
| 629 |
+
<div class="processing-indicator">
|
| 630 |
+
<span class="processing-dot"></span>
|
| 631 |
+
<span class="processing-dot"></span>
|
| 632 |
+
<span class="processing-dot"></span>
|
| 633 |
+
</div>
|
| 634 |
+
"""
|
| 635 |
+
|
| 636 |
+
grounding_badge = ""
|
| 637 |
+
if handler.state.grounding_results:
|
| 638 |
+
grounding_badge = '<div class="grounding-indicator">π Google Search</div>'
|
| 639 |
+
|
| 640 |
+
status_html = f"""
|
| 641 |
+
<div style="text-align: center; color: white; position: relative;">
|
| 642 |
+
<span class="status-indicator {status_class}"></span>
|
| 643 |
+
<span>{status}</span>
|
| 644 |
+
{processing_indicator}
|
| 645 |
+
{grounding_badge}
|
| 646 |
+
</div>
|
| 647 |
+
"""
|
| 648 |
+
|
| 649 |
+
return {
|
| 650 |
+
status_display: status_html,
|
| 651 |
+
conversation_display: formatted_html
|
| 652 |
+
}
|
| 653 |
+
|
| 654 |
+
# Wire up events
|
| 655 |
+
start_btn.click(start_chat, outputs=[status_display])
|
| 656 |
+
stop_btn.click(stop_chat, outputs=[status_display])
|
| 657 |
+
clear_btn.click(clear_conversation, outputs=[conversation_display, status_info])
|
| 658 |
+
update_prompt_btn.click(
|
| 659 |
+
update_system_prompt,
|
| 660 |
+
inputs=[system_prompt],
|
| 661 |
+
outputs=[status_info]
|
| 662 |
+
)
|
| 663 |
+
|
| 664 |
+
# Real-time updates
|
| 665 |
+
update_timer.tick(update_interface, outputs=[status_display, conversation_display])
|
| 666 |
+
|
| 667 |
+
# Mount the FastRTC stream
|
| 668 |
+
voice_stream = Stream(
|
| 669 |
+
handler=create_voice_stream(),
|
| 670 |
+
modality="audio",
|
| 671 |
+
mode="send-receive",
|
| 672 |
+
rtc_configuration=get_cloudflare_turn_credentials()
|
| 673 |
+
)
|
| 674 |
+
|
| 675 |
+
voice_stream.mount(demo)
|
| 676 |
+
|
| 677 |
+
if __name__ == "__main__":
|
| 678 |
+
demo.launch(
|
| 679 |
+
server_name="0.0.0.0",
|
| 680 |
+
server_port=7860,
|
| 681 |
+
share=True
|
| 682 |
+
)
|
requirements.txt
CHANGED
|
@@ -1,7 +1,55 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
google-generativeai
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
spaces
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
fastrtc[vad,stt,tts]>=0.0.1
|
| 3 |
+
google-generativeai>=0.3.0
|
| 4 |
+
google-generativeai>=0.4.0
|
| 5 |
+
numpy>=1.24.0
|
| 6 |
+
asyncio
|
| 7 |
spaces
|
| 8 |
+
torch
|
| 9 |
+
transformers
|
| 10 |
+
accelerate
|
| 11 |
+
websockets>=11.0.0
|
| 12 |
+
|
| 13 |
+
This modern Gradio app features:
|
| 14 |
+
|
| 15 |
+
## π¨ **Modern Design Elements:**
|
| 16 |
+
- **Gradient backgrounds** with purple/pink color scheme
|
| 17 |
+
- **Glass morphism effects** with backdrop blur
|
| 18 |
+
- **Smooth animations** for all UI elements
|
| 19 |
+
- **Custom font** (Inter) for professional typography
|
| 20 |
+
- **Responsive layout** that adapts to different screen sizes
|
| 21 |
+
|
| 22 |
+
## π **Interactive Animations:**
|
| 23 |
+
- **Pulsing voice container** when active
|
| 24 |
+
- **Animated voice waves** showing audio activity
|
| 25 |
+
- **Bouncing processing dots** during AI thinking
|
| 26 |
+
- **Fade and slide animations** for conversation bubbles
|
| 27 |
+
- **Status indicators** with glow effects
|
| 28 |
+
|
| 29 |
+
## ποΈ **Voice Processing Features:**
|
| 30 |
+
- **Real-time speech-to-text** conversion
|
| 31 |
+
- **Voice activity detection** with automatic pause detection
|
| 32 |
+
- **Natural text-to-speech** responses
|
| 33 |
+
- **Conversation memory** for context awareness
|
| 34 |
+
- **Interrupt capability** to cut off responses
|
| 35 |
+
|
| 36 |
+
## π¬ **Conversation Display:**
|
| 37 |
+
- **Styled message bubbles** with different colors for user/assistant
|
| 38 |
+
- **Real-time updates** showing conversation flow
|
| 39 |
+
- **Scrollable history** with custom scrollbar
|
| 40 |
+
- **Status indicators** showing connection state
|
| 41 |
+
|
| 42 |
+
## βοΈ **Advanced Settings:**
|
| 43 |
+
- **Customizable system prompt** for personality control
|
| 44 |
+
- **Response speed adjustment** slider
|
| 45 |
+
- **Clear conversation** functionality
|
| 46 |
+
- **Start/stop controls** with visual feedback
|
| 47 |
+
|
| 48 |
+
## π§ **Technical Features:**
|
| 49 |
+
- **WebRTC streaming** for low-latency audio
|
| 50 |
+
- **Cloudflare TURN** for firewall traversal
|
| 51 |
+
- **ZeroGPU optimization** for performance
|
| 52 |
+
- **Modular architecture** for maintainability
|
| 53 |
+
- **Error handling** with fallback responses
|
| 54 |
+
|
| 55 |
+
The app provides a professional, engaging voice chat experience with smooth animations and real-time feedback throughout the conversation process.
|