Prioritize HF Endpoint as primary provider and enhance error handling
Browse files- app.py +36 -16
- src/llm/factory.py +17 -40
- src/ui/chat_handler.py +50 -19
app.py
CHANGED
|
@@ -16,6 +16,8 @@ from core.errors import translate_error
|
|
| 16 |
from core.personality import personality
|
| 17 |
from src.analytics.user_logger import user_logger
|
| 18 |
from src.analytics.session_analytics import session_analytics
|
|
|
|
|
|
|
| 19 |
import logging
|
| 20 |
|
| 21 |
# Set up logging
|
|
@@ -56,25 +58,43 @@ with st.sidebar:
|
|
| 56 |
st.title("🐱 CosmicCat AI Assistant")
|
| 57 |
st.markdown("Your personal AI-powered assistant with a cosmic twist.")
|
| 58 |
|
| 59 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
model_options = {
|
| 61 |
-
"
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
}
|
| 65 |
-
|
| 66 |
-
|
|
|
|
| 67 |
options=list(model_options.keys()),
|
| 68 |
-
index=0
|
| 69 |
-
on_change=lambda: session_analytics.track_interaction("default_user", st.session_state.session_id, "model_selection", {
|
| 70 |
-
"selected_model": st.session_state.selected_model if 'selected_model' in st.session_state else model_options[list(model_options.keys())[0]]
|
| 71 |
-
})
|
| 72 |
)
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
# Log model selection
|
| 76 |
session_analytics.track_interaction("default_user", st.session_state.session_id, "model_selection", {
|
| 77 |
-
"selected_model":
|
|
|
|
| 78 |
})
|
| 79 |
|
| 80 |
# Cosmic mode toggle
|
|
@@ -269,7 +289,7 @@ user_input = st.chat_input("Type your message here...", key="chat_input")
|
|
| 269 |
if user_input and user_input.strip():
|
| 270 |
# Handle user message display first
|
| 271 |
if not st.session_state.get('is_processing', False):
|
| 272 |
-
chat_handler.process_user_message(user_input,
|
| 273 |
else:
|
| 274 |
st.warning("Still processing your previous request...")
|
| 275 |
|
|
@@ -277,7 +297,7 @@ if user_input and user_input.strip():
|
|
| 277 |
if st.session_state.get('is_processing', False) and st.session_state.get('last_processed_message'):
|
| 278 |
chat_handler.process_ai_response(
|
| 279 |
st.session_state.last_processed_message,
|
| 280 |
-
|
| 281 |
)
|
| 282 |
|
| 283 |
# About tab
|
|
@@ -299,8 +319,8 @@ with tab1:
|
|
| 299 |
When enabled, the AI responds with space-themed language and metaphors.
|
| 300 |
|
| 301 |
### 🛠️ Technical Architecture
|
| 302 |
-
- **Primary model**:
|
| 303 |
-
- **Secondary model**:
|
| 304 |
- **Memory system**: Redis-based session management
|
| 305 |
""")
|
| 306 |
|
|
|
|
| 16 |
from core.personality import personality
|
| 17 |
from src.analytics.user_logger import user_logger
|
| 18 |
from src.analytics.session_analytics import session_analytics
|
| 19 |
+
from src.llm.factory import llm_factory
|
| 20 |
+
from src.services.hf_monitor import hf_monitor
|
| 21 |
import logging
|
| 22 |
|
| 23 |
# Set up logging
|
|
|
|
| 58 |
st.title("🐱 CosmicCat AI Assistant")
|
| 59 |
st.markdown("Your personal AI-powered assistant with a cosmic twist.")
|
| 60 |
|
| 61 |
+
# Show available providers
|
| 62 |
+
available_providers = []
|
| 63 |
+
if config.hf_token and hf_monitor.get_endpoint_status()["available"]:
|
| 64 |
+
available_providers.append("🤗 HF Endpoint (Primary)")
|
| 65 |
+
if config.ollama_host:
|
| 66 |
+
available_providers.append("🦙 Ollama (Local)")
|
| 67 |
+
|
| 68 |
+
# Model selection (now shows actual providers)
|
| 69 |
model_options = {
|
| 70 |
+
"Auto Select": "auto", # Will choose best available
|
| 71 |
+
"🦙 Ollama (Local)": "ollama",
|
| 72 |
+
"🤗 HF Endpoint": "huggingface"
|
| 73 |
}
|
| 74 |
+
|
| 75 |
+
selected_model_key = st.selectbox(
|
| 76 |
+
"Select Provider",
|
| 77 |
options=list(model_options.keys()),
|
| 78 |
+
index=0
|
|
|
|
|
|
|
|
|
|
| 79 |
)
|
| 80 |
+
selected_model_value = model_options[selected_model_key]
|
| 81 |
+
|
| 82 |
+
# Show which provider will actually be used
|
| 83 |
+
actual_provider = "Unknown"
|
| 84 |
+
if selected_model_value == "auto":
|
| 85 |
+
if config.hf_token and hf_monitor.get_endpoint_status()["available"]:
|
| 86 |
+
actual_provider = "🤗 HF Endpoint"
|
| 87 |
+
elif config.ollama_host:
|
| 88 |
+
actual_provider = "🦙 Ollama"
|
| 89 |
+
else:
|
| 90 |
+
actual_provider = "🤗 HF Endpoint" if selected_model_value == "huggingface" else "🦙 Ollama"
|
| 91 |
+
|
| 92 |
+
st.info(f"**Using Provider:** {actual_provider}")
|
| 93 |
|
| 94 |
# Log model selection
|
| 95 |
session_analytics.track_interaction("default_user", st.session_state.session_id, "model_selection", {
|
| 96 |
+
"selected_model": selected_model_value,
|
| 97 |
+
"actual_provider": actual_provider
|
| 98 |
})
|
| 99 |
|
| 100 |
# Cosmic mode toggle
|
|
|
|
| 289 |
if user_input and user_input.strip():
|
| 290 |
# Handle user message display first
|
| 291 |
if not st.session_state.get('is_processing', False):
|
| 292 |
+
chat_handler.process_user_message(user_input, selected_model_value)
|
| 293 |
else:
|
| 294 |
st.warning("Still processing your previous request...")
|
| 295 |
|
|
|
|
| 297 |
if st.session_state.get('is_processing', False) and st.session_state.get('last_processed_message'):
|
| 298 |
chat_handler.process_ai_response(
|
| 299 |
st.session_state.last_processed_message,
|
| 300 |
+
selected_model_value
|
| 301 |
)
|
| 302 |
|
| 303 |
# About tab
|
|
|
|
| 319 |
When enabled, the AI responds with space-themed language and metaphors.
|
| 320 |
|
| 321 |
### 🛠️ Technical Architecture
|
| 322 |
+
- **Primary model**: HF Endpoint (advanced processing)
|
| 323 |
+
- **Secondary model**: Ollama (local processing)
|
| 324 |
- **Memory system**: Redis-based session management
|
| 325 |
""")
|
| 326 |
|
src/llm/factory.py
CHANGED
|
@@ -25,54 +25,31 @@ class LLMFactory:
|
|
| 25 |
def get_provider(self, preferred_provider: Optional[str] = None) -> LLMProvider:
|
| 26 |
"""
|
| 27 |
Get an LLM provider based on preference and availability.
|
|
|
|
| 28 |
"""
|
| 29 |
-
#
|
| 30 |
-
if preferred_provider:
|
| 31 |
-
provider = self._try_provider(preferred_provider)
|
| 32 |
-
if provider:
|
| 33 |
-
return provider
|
| 34 |
-
|
| 35 |
-
# Try HF provider if configured
|
| 36 |
if config.hf_token:
|
| 37 |
-
|
| 38 |
-
if
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
# Try Ollama as fallback
|
| 42 |
-
if config.ollama_host:
|
| 43 |
-
provider = self._try_provider("ollama")
|
| 44 |
-
if provider:
|
| 45 |
-
return provider
|
| 46 |
-
|
| 47 |
-
raise ProviderNotAvailableError("No LLM providers are available or configured")
|
| 48 |
-
|
| 49 |
-
def _try_provider(self, provider_name: str) -> Optional[LLMProvider]:
|
| 50 |
-
"""Try to initialize a specific provider"""
|
| 51 |
-
try:
|
| 52 |
-
if provider_name == "huggingface" and config.hf_token:
|
| 53 |
-
# Check if HF endpoint is available
|
| 54 |
-
status = hf_monitor.get_endpoint_status()
|
| 55 |
-
if status["available"] or status["initializing"]:
|
| 56 |
return HuggingFaceProvider(
|
| 57 |
model_name="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf"
|
| 58 |
)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
elif provider_name == "ollama" and config.ollama_host:
|
| 68 |
return OllamaProvider(
|
| 69 |
model_name=config.local_model_name
|
| 70 |
)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
return None
|
| 76 |
|
| 77 |
# Global factory instance
|
| 78 |
llm_factory = LLMFactory()
|
|
|
|
| 25 |
def get_provider(self, preferred_provider: Optional[str] = None) -> LLMProvider:
|
| 26 |
"""
|
| 27 |
Get an LLM provider based on preference and availability.
|
| 28 |
+
Priority: HF Endpoint > Ollama > Error
|
| 29 |
"""
|
| 30 |
+
# Check if HF token is available and endpoint is ready
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
if config.hf_token:
|
| 32 |
+
status = hf_monitor.get_endpoint_status()
|
| 33 |
+
if status["available"]:
|
| 34 |
+
try:
|
| 35 |
+
logger.info("Using HF Endpoint as primary provider")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
return HuggingFaceProvider(
|
| 37 |
model_name="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf"
|
| 38 |
)
|
| 39 |
+
except Exception as e:
|
| 40 |
+
logger.warning(f"Failed to initialize HF provider: {e}")
|
| 41 |
+
|
| 42 |
+
# Try Ollama as fallback
|
| 43 |
+
if config.ollama_host:
|
| 44 |
+
try:
|
| 45 |
+
logger.info("Using Ollama as provider")
|
|
|
|
|
|
|
| 46 |
return OllamaProvider(
|
| 47 |
model_name=config.local_model_name
|
| 48 |
)
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logger.warning(f"Failed to initialize Ollama provider: {e}")
|
| 51 |
+
|
| 52 |
+
raise ProviderNotAvailableError("No LLM providers are available or configured")
|
|
|
|
| 53 |
|
| 54 |
# Global factory instance
|
| 55 |
llm_factory = LLMFactory()
|
src/ui/chat_handler.py
CHANGED
|
@@ -4,6 +4,7 @@ import logging
|
|
| 4 |
from typing import Optional
|
| 5 |
from src.llm.factory import llm_factory, ProviderNotAvailableError
|
| 6 |
from core.session import session_manager
|
|
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
|
@@ -24,18 +25,17 @@ class ChatHandler:
|
|
| 24 |
logger.info("Preventing duplicate message processing")
|
| 25 |
return
|
| 26 |
|
| 27 |
-
# Set processing flag
|
| 28 |
st.session_state.is_processing = True
|
| 29 |
st.session_state.last_processed_message = user_input
|
| 30 |
|
| 31 |
try:
|
| 32 |
-
# Show user message immediately
|
| 33 |
timestamp = time.strftime("%H:%M:%S")
|
| 34 |
with st.chat_message("user"):
|
| 35 |
st.markdown(user_input)
|
| 36 |
st.caption(f"🕒 {timestamp}")
|
| 37 |
|
| 38 |
-
# Add to session state history
|
| 39 |
if "messages" not in st.session_state:
|
| 40 |
st.session_state.messages = []
|
| 41 |
|
|
@@ -45,7 +45,7 @@ class ChatHandler:
|
|
| 45 |
"timestamp": timestamp
|
| 46 |
})
|
| 47 |
|
| 48 |
-
# Force
|
| 49 |
st.experimental_rerun()
|
| 50 |
|
| 51 |
except Exception as e:
|
|
@@ -65,8 +65,8 @@ class ChatHandler:
|
|
| 65 |
response_placeholder = st.empty()
|
| 66 |
|
| 67 |
try:
|
| 68 |
-
# Determine provider based on
|
| 69 |
-
provider_name = self.
|
| 70 |
status_placeholder.info(f"🚀 Contacting {self._get_provider_display_name(provider_name)}...")
|
| 71 |
|
| 72 |
# Get response with timeout handling
|
|
@@ -112,12 +112,32 @@ class ChatHandler:
|
|
| 112 |
logger.error(f"Provider not available: {e}")
|
| 113 |
|
| 114 |
except Exception as e:
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
timestamp = time.strftime("%H:%M:%S")
|
| 118 |
st.session_state.messages.append({
|
| 119 |
"role": "assistant",
|
| 120 |
-
"content":
|
| 121 |
"timestamp": timestamp
|
| 122 |
})
|
| 123 |
logger.error(f"Chat processing error: {e}", exc_info=True)
|
|
@@ -130,16 +150,27 @@ class ChatHandler:
|
|
| 130 |
st.session_state.is_processing = False
|
| 131 |
st.session_state.last_processed_message = ""
|
| 132 |
time.sleep(0.1)
|
| 133 |
-
st.experimental_rerun()
|
| 134 |
|
| 135 |
-
def
|
| 136 |
-
"""Determine
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
def _get_provider_display_name(self, provider_name: str) -> str:
|
| 145 |
"""Get display name for provider"""
|
|
@@ -159,7 +190,7 @@ class ChatHandler:
|
|
| 159 |
# Add current user message
|
| 160 |
conversation_history.append({"role": "user", "content": user_input})
|
| 161 |
|
| 162 |
-
# Get provider
|
| 163 |
provider = llm_factory.get_provider(provider_name)
|
| 164 |
|
| 165 |
# Generate response with timeout
|
|
|
|
| 4 |
from typing import Optional
|
| 5 |
from src.llm.factory import llm_factory, ProviderNotAvailableError
|
| 6 |
from core.session import session_manager
|
| 7 |
+
from utils.config import config
|
| 8 |
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
|
|
|
| 25 |
logger.info("Preventing duplicate message processing")
|
| 26 |
return
|
| 27 |
|
|
|
|
| 28 |
st.session_state.is_processing = True
|
| 29 |
st.session_state.last_processed_message = user_input
|
| 30 |
|
| 31 |
try:
|
| 32 |
+
# Show user message immediately
|
| 33 |
timestamp = time.strftime("%H:%M:%S")
|
| 34 |
with st.chat_message("user"):
|
| 35 |
st.markdown(user_input)
|
| 36 |
st.caption(f"🕒 {timestamp}")
|
| 37 |
|
| 38 |
+
# Add to session state history
|
| 39 |
if "messages" not in st.session_state:
|
| 40 |
st.session_state.messages = []
|
| 41 |
|
|
|
|
| 45 |
"timestamp": timestamp
|
| 46 |
})
|
| 47 |
|
| 48 |
+
# Force UI update
|
| 49 |
st.experimental_rerun()
|
| 50 |
|
| 51 |
except Exception as e:
|
|
|
|
| 65 |
response_placeholder = st.empty()
|
| 66 |
|
| 67 |
try:
|
| 68 |
+
# Determine provider based on selection and availability
|
| 69 |
+
provider_name = self._get_best_provider(selected_model)
|
| 70 |
status_placeholder.info(f"🚀 Contacting {self._get_provider_display_name(provider_name)}...")
|
| 71 |
|
| 72 |
# Get response with timeout handling
|
|
|
|
| 112 |
logger.error(f"Provider not available: {e}")
|
| 113 |
|
| 114 |
except Exception as e:
|
| 115 |
+
# Better user-friendly error messages
|
| 116 |
+
status_placeholder.error("❌ Request failed")
|
| 117 |
+
|
| 118 |
+
# More specific error messages
|
| 119 |
+
if "timeout" in str(e).lower() or "500" in str(e):
|
| 120 |
+
error_message = ("⏰ Request failed. This might be because:\n"
|
| 121 |
+
"• The AI model is taking too long to respond\n"
|
| 122 |
+
"• The provider is overloaded\n\n"
|
| 123 |
+
"**Try one of these solutions:**\n"
|
| 124 |
+
"1. Use the HF Endpoint (🟢 HF Endpoint: Available and ready)\n"
|
| 125 |
+
"2. Wait a moment and try again\n"
|
| 126 |
+
"3. Simplify your question")
|
| 127 |
+
elif "connection" in str(e).lower():
|
| 128 |
+
error_message = ("🔌 Connection failed. This might be because:\n"
|
| 129 |
+
"• Your Ollama server is offline\n"
|
| 130 |
+
"• Incorrect Ollama URL\n"
|
| 131 |
+
"• Network firewall blocking connection\n"
|
| 132 |
+
"• Try using the HF Endpoint instead")
|
| 133 |
+
else:
|
| 134 |
+
error_message = f"Sorry, I encountered an error: {str(e)}"
|
| 135 |
+
|
| 136 |
+
response_placeholder.markdown(error_message)
|
| 137 |
timestamp = time.strftime("%H:%M:%S")
|
| 138 |
st.session_state.messages.append({
|
| 139 |
"role": "assistant",
|
| 140 |
+
"content": error_message,
|
| 141 |
"timestamp": timestamp
|
| 142 |
})
|
| 143 |
logger.error(f"Chat processing error: {e}", exc_info=True)
|
|
|
|
| 150 |
st.session_state.is_processing = False
|
| 151 |
st.session_state.last_processed_message = ""
|
| 152 |
time.sleep(0.1)
|
|
|
|
| 153 |
|
| 154 |
+
def _get_best_provider(self, selected_model: str) -> str:
|
| 155 |
+
"""Determine the best available provider"""
|
| 156 |
+
from src.services.hf_monitor import hf_monitor
|
| 157 |
+
|
| 158 |
+
# If user selected specific provider, try that
|
| 159 |
+
if selected_model == "ollama" and config.ollama_host:
|
| 160 |
+
return "ollama"
|
| 161 |
+
elif selected_model == "huggingface" and config.hf_token:
|
| 162 |
+
return "huggingface"
|
| 163 |
+
|
| 164 |
+
# Auto-select based on availability
|
| 165 |
+
if config.hf_token:
|
| 166 |
+
status = hf_monitor.get_endpoint_status()
|
| 167 |
+
if status["available"]:
|
| 168 |
+
return "huggingface"
|
| 169 |
+
|
| 170 |
+
if config.ollama_host:
|
| 171 |
+
return "ollama"
|
| 172 |
+
|
| 173 |
+
return "ollama" # Default fallback
|
| 174 |
|
| 175 |
def _get_provider_display_name(self, provider_name: str) -> str:
|
| 176 |
"""Get display name for provider"""
|
|
|
|
| 190 |
# Add current user message
|
| 191 |
conversation_history.append({"role": "user", "content": user_input})
|
| 192 |
|
| 193 |
+
# Get provider (with intelligent fallback)
|
| 194 |
provider = llm_factory.get_provider(provider_name)
|
| 195 |
|
| 196 |
# Generate response with timeout
|