rdune71 commited on
Commit
972f028
·
1 Parent(s): 953f580

Prioritize HF Endpoint as primary provider and enhance error handling

Browse files
Files changed (3) hide show
  1. app.py +36 -16
  2. src/llm/factory.py +17 -40
  3. src/ui/chat_handler.py +50 -19
app.py CHANGED
@@ -16,6 +16,8 @@ from core.errors import translate_error
16
  from core.personality import personality
17
  from src.analytics.user_logger import user_logger
18
  from src.analytics.session_analytics import session_analytics
 
 
19
  import logging
20
 
21
  # Set up logging
@@ -56,25 +58,43 @@ with st.sidebar:
56
  st.title("🐱 CosmicCat AI Assistant")
57
  st.markdown("Your personal AI-powered assistant with a cosmic twist.")
58
 
59
- # Model selection
 
 
 
 
 
 
 
60
  model_options = {
61
- "Mistral 7B (Local)": "mistral:latest",
62
- "Llama 2 7B (Local)": "llama2:latest",
63
- "OpenChat 3.5 (Local)": "openchat:latest"
64
  }
65
- selected_model_name = st.selectbox(
66
- "Select Model",
 
67
  options=list(model_options.keys()),
68
- index=0,
69
- on_change=lambda: session_analytics.track_interaction("default_user", st.session_state.session_id, "model_selection", {
70
- "selected_model": st.session_state.selected_model if 'selected_model' in st.session_state else model_options[list(model_options.keys())[0]]
71
- })
72
  )
73
- st.session_state.selected_model = model_options[selected_model_name]
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # Log model selection
76
  session_analytics.track_interaction("default_user", st.session_state.session_id, "model_selection", {
77
- "selected_model": st.session_state.selected_model
 
78
  })
79
 
80
  # Cosmic mode toggle
@@ -269,7 +289,7 @@ user_input = st.chat_input("Type your message here...", key="chat_input")
269
  if user_input and user_input.strip():
270
  # Handle user message display first
271
  if not st.session_state.get('is_processing', False):
272
- chat_handler.process_user_message(user_input, selected_model_name)
273
  else:
274
  st.warning("Still processing your previous request...")
275
 
@@ -277,7 +297,7 @@ if user_input and user_input.strip():
277
  if st.session_state.get('is_processing', False) and st.session_state.get('last_processed_message'):
278
  chat_handler.process_ai_response(
279
  st.session_state.last_processed_message,
280
- selected_model_name
281
  )
282
 
283
  # About tab
@@ -299,8 +319,8 @@ with tab1:
299
  When enabled, the AI responds with space-themed language and metaphors.
300
 
301
  ### 🛠️ Technical Architecture
302
- - **Primary model**: Ollama (local processing)
303
- - **Secondary model**: HF Endpoint (advanced processing)
304
  - **Memory system**: Redis-based session management
305
  """)
306
 
 
16
  from core.personality import personality
17
  from src.analytics.user_logger import user_logger
18
  from src.analytics.session_analytics import session_analytics
19
+ from src.llm.factory import llm_factory
20
+ from src.services.hf_monitor import hf_monitor
21
  import logging
22
 
23
  # Set up logging
 
58
  st.title("🐱 CosmicCat AI Assistant")
59
  st.markdown("Your personal AI-powered assistant with a cosmic twist.")
60
 
61
+ # Show available providers
62
+ available_providers = []
63
+ if config.hf_token and hf_monitor.get_endpoint_status()["available"]:
64
+ available_providers.append("🤗 HF Endpoint (Primary)")
65
+ if config.ollama_host:
66
+ available_providers.append("🦙 Ollama (Local)")
67
+
68
+ # Model selection (now shows actual providers)
69
  model_options = {
70
+ "Auto Select": "auto", # Will choose best available
71
+ "🦙 Ollama (Local)": "ollama",
72
+ "🤗 HF Endpoint": "huggingface"
73
  }
74
+
75
+ selected_model_key = st.selectbox(
76
+ "Select Provider",
77
  options=list(model_options.keys()),
78
+ index=0
 
 
 
79
  )
80
+ selected_model_value = model_options[selected_model_key]
81
+
82
+ # Show which provider will actually be used
83
+ actual_provider = "Unknown"
84
+ if selected_model_value == "auto":
85
+ if config.hf_token and hf_monitor.get_endpoint_status()["available"]:
86
+ actual_provider = "🤗 HF Endpoint"
87
+ elif config.ollama_host:
88
+ actual_provider = "🦙 Ollama"
89
+ else:
90
+ actual_provider = "🤗 HF Endpoint" if selected_model_value == "huggingface" else "🦙 Ollama"
91
+
92
+ st.info(f"**Using Provider:** {actual_provider}")
93
 
94
  # Log model selection
95
  session_analytics.track_interaction("default_user", st.session_state.session_id, "model_selection", {
96
+ "selected_model": selected_model_value,
97
+ "actual_provider": actual_provider
98
  })
99
 
100
  # Cosmic mode toggle
 
289
  if user_input and user_input.strip():
290
  # Handle user message display first
291
  if not st.session_state.get('is_processing', False):
292
+ chat_handler.process_user_message(user_input, selected_model_value)
293
  else:
294
  st.warning("Still processing your previous request...")
295
 
 
297
  if st.session_state.get('is_processing', False) and st.session_state.get('last_processed_message'):
298
  chat_handler.process_ai_response(
299
  st.session_state.last_processed_message,
300
+ selected_model_value
301
  )
302
 
303
  # About tab
 
319
  When enabled, the AI responds with space-themed language and metaphors.
320
 
321
  ### 🛠️ Technical Architecture
322
+ - **Primary model**: HF Endpoint (advanced processing)
323
+ - **Secondary model**: Ollama (local processing)
324
  - **Memory system**: Redis-based session management
325
  """)
326
 
src/llm/factory.py CHANGED
@@ -25,54 +25,31 @@ class LLMFactory:
25
  def get_provider(self, preferred_provider: Optional[str] = None) -> LLMProvider:
26
  """
27
  Get an LLM provider based on preference and availability.
 
28
  """
29
- # Try preferred provider first
30
- if preferred_provider:
31
- provider = self._try_provider(preferred_provider)
32
- if provider:
33
- return provider
34
-
35
- # Try HF provider if configured
36
  if config.hf_token:
37
- provider = self._try_provider("huggingface")
38
- if provider:
39
- return provider
40
-
41
- # Try Ollama as fallback
42
- if config.ollama_host:
43
- provider = self._try_provider("ollama")
44
- if provider:
45
- return provider
46
-
47
- raise ProviderNotAvailableError("No LLM providers are available or configured")
48
-
49
- def _try_provider(self, provider_name: str) -> Optional[LLMProvider]:
50
- """Try to initialize a specific provider"""
51
- try:
52
- if provider_name == "huggingface" and config.hf_token:
53
- # Check if HF endpoint is available
54
- status = hf_monitor.get_endpoint_status()
55
- if status["available"] or status["initializing"]:
56
  return HuggingFaceProvider(
57
  model_name="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf"
58
  )
59
- elif status["status"] == "scaled_to_zero":
60
- # Attempt to wake up the endpoint
61
- logger.info("Attempting to wake up HF endpoint...")
62
- if hf_monitor.attempt_wake_up():
63
- return HuggingFaceProvider(
64
- model_name="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf"
65
- )
66
-
67
- elif provider_name == "ollama" and config.ollama_host:
68
  return OllamaProvider(
69
  model_name=config.local_model_name
70
  )
71
-
72
- except Exception as e:
73
- logger.warning(f"Failed to initialize {provider_name} provider: {e}")
74
-
75
- return None
76
 
77
  # Global factory instance
78
  llm_factory = LLMFactory()
 
25
  def get_provider(self, preferred_provider: Optional[str] = None) -> LLMProvider:
26
  """
27
  Get an LLM provider based on preference and availability.
28
+ Priority: HF Endpoint > Ollama > Error
29
  """
30
+ # Check if HF token is available and endpoint is ready
 
 
 
 
 
 
31
  if config.hf_token:
32
+ status = hf_monitor.get_endpoint_status()
33
+ if status["available"]:
34
+ try:
35
+ logger.info("Using HF Endpoint as primary provider")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  return HuggingFaceProvider(
37
  model_name="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf"
38
  )
39
+ except Exception as e:
40
+ logger.warning(f"Failed to initialize HF provider: {e}")
41
+
42
+ # Try Ollama as fallback
43
+ if config.ollama_host:
44
+ try:
45
+ logger.info("Using Ollama as provider")
 
 
46
  return OllamaProvider(
47
  model_name=config.local_model_name
48
  )
49
+ except Exception as e:
50
+ logger.warning(f"Failed to initialize Ollama provider: {e}")
51
+
52
+ raise ProviderNotAvailableError("No LLM providers are available or configured")
 
53
 
54
  # Global factory instance
55
  llm_factory = LLMFactory()
src/ui/chat_handler.py CHANGED
@@ -4,6 +4,7 @@ import logging
4
  from typing import Optional
5
  from src.llm.factory import llm_factory, ProviderNotAvailableError
6
  from core.session import session_manager
 
7
 
8
  logger = logging.getLogger(__name__)
9
 
@@ -24,18 +25,17 @@ class ChatHandler:
24
  logger.info("Preventing duplicate message processing")
25
  return
26
 
27
- # Set processing flag
28
  st.session_state.is_processing = True
29
  st.session_state.last_processed_message = user_input
30
 
31
  try:
32
- # Show user message immediately (this was missing!)
33
  timestamp = time.strftime("%H:%M:%S")
34
  with st.chat_message("user"):
35
  st.markdown(user_input)
36
  st.caption(f"🕒 {timestamp}")
37
 
38
- # Add to session state history immediately
39
  if "messages" not in st.session_state:
40
  st.session_state.messages = []
41
 
@@ -45,7 +45,7 @@ class ChatHandler:
45
  "timestamp": timestamp
46
  })
47
 
48
- # Force immediate UI update
49
  st.experimental_rerun()
50
 
51
  except Exception as e:
@@ -65,8 +65,8 @@ class ChatHandler:
65
  response_placeholder = st.empty()
66
 
67
  try:
68
- # Determine provider based on model selection
69
- provider_name = self._get_provider_for_model(selected_model)
70
  status_placeholder.info(f"🚀 Contacting {self._get_provider_display_name(provider_name)}...")
71
 
72
  # Get response with timeout handling
@@ -112,12 +112,32 @@ class ChatHandler:
112
  logger.error(f"Provider not available: {e}")
113
 
114
  except Exception as e:
115
- status_placeholder.error(f"❌ Error: {str(e)[:100]}...")
116
- response_placeholder.markdown(f"Sorry, I encountered an error: {str(e)[:100]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  timestamp = time.strftime("%H:%M:%S")
118
  st.session_state.messages.append({
119
  "role": "assistant",
120
- "content": f"Sorry, I encountered an error: {str(e)[:100]}...",
121
  "timestamp": timestamp
122
  })
123
  logger.error(f"Chat processing error: {e}", exc_info=True)
@@ -130,16 +150,27 @@ class ChatHandler:
130
  st.session_state.is_processing = False
131
  st.session_state.last_processed_message = ""
132
  time.sleep(0.1)
133
- st.experimental_rerun()
134
 
135
- def _get_provider_for_model(self, selected_model: str) -> str:
136
- """Determine which provider to use based on model selection"""
137
- model_to_provider = {
138
- "Mistral 7B (Local)": "ollama",
139
- "Llama 2 7B (Local)": "ollama",
140
- "OpenChat 3.5 (Local)": "ollama"
141
- }
142
- return model_to_provider.get(selected_model, "ollama")
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  def _get_provider_display_name(self, provider_name: str) -> str:
145
  """Get display name for provider"""
@@ -159,7 +190,7 @@ class ChatHandler:
159
  # Add current user message
160
  conversation_history.append({"role": "user", "content": user_input})
161
 
162
- # Get provider
163
  provider = llm_factory.get_provider(provider_name)
164
 
165
  # Generate response with timeout
 
4
  from typing import Optional
5
  from src.llm.factory import llm_factory, ProviderNotAvailableError
6
  from core.session import session_manager
7
+ from utils.config import config
8
 
9
  logger = logging.getLogger(__name__)
10
 
 
25
  logger.info("Preventing duplicate message processing")
26
  return
27
 
 
28
  st.session_state.is_processing = True
29
  st.session_state.last_processed_message = user_input
30
 
31
  try:
32
+ # Show user message immediately
33
  timestamp = time.strftime("%H:%M:%S")
34
  with st.chat_message("user"):
35
  st.markdown(user_input)
36
  st.caption(f"🕒 {timestamp}")
37
 
38
+ # Add to session state history
39
  if "messages" not in st.session_state:
40
  st.session_state.messages = []
41
 
 
45
  "timestamp": timestamp
46
  })
47
 
48
+ # Force UI update
49
  st.experimental_rerun()
50
 
51
  except Exception as e:
 
65
  response_placeholder = st.empty()
66
 
67
  try:
68
+ # Determine provider based on selection and availability
69
+ provider_name = self._get_best_provider(selected_model)
70
  status_placeholder.info(f"🚀 Contacting {self._get_provider_display_name(provider_name)}...")
71
 
72
  # Get response with timeout handling
 
112
  logger.error(f"Provider not available: {e}")
113
 
114
  except Exception as e:
115
+ # Better user-friendly error messages
116
+ status_placeholder.error(" Request failed")
117
+
118
+ # More specific error messages
119
+ if "timeout" in str(e).lower() or "500" in str(e):
120
+ error_message = ("⏰ Request failed. This might be because:\n"
121
+ "• The AI model is taking too long to respond\n"
122
+ "• The provider is overloaded\n\n"
123
+ "**Try one of these solutions:**\n"
124
+ "1. Use the HF Endpoint (🟢 HF Endpoint: Available and ready)\n"
125
+ "2. Wait a moment and try again\n"
126
+ "3. Simplify your question")
127
+ elif "connection" in str(e).lower():
128
+ error_message = ("🔌 Connection failed. This might be because:\n"
129
+ "• Your Ollama server is offline\n"
130
+ "• Incorrect Ollama URL\n"
131
+ "• Network firewall blocking connection\n"
132
+ "• Try using the HF Endpoint instead")
133
+ else:
134
+ error_message = f"Sorry, I encountered an error: {str(e)}"
135
+
136
+ response_placeholder.markdown(error_message)
137
  timestamp = time.strftime("%H:%M:%S")
138
  st.session_state.messages.append({
139
  "role": "assistant",
140
+ "content": error_message,
141
  "timestamp": timestamp
142
  })
143
  logger.error(f"Chat processing error: {e}", exc_info=True)
 
150
  st.session_state.is_processing = False
151
  st.session_state.last_processed_message = ""
152
  time.sleep(0.1)
 
153
 
154
+ def _get_best_provider(self, selected_model: str) -> str:
155
+ """Determine the best available provider"""
156
+ from src.services.hf_monitor import hf_monitor
157
+
158
+ # If user selected specific provider, try that
159
+ if selected_model == "ollama" and config.ollama_host:
160
+ return "ollama"
161
+ elif selected_model == "huggingface" and config.hf_token:
162
+ return "huggingface"
163
+
164
+ # Auto-select based on availability
165
+ if config.hf_token:
166
+ status = hf_monitor.get_endpoint_status()
167
+ if status["available"]:
168
+ return "huggingface"
169
+
170
+ if config.ollama_host:
171
+ return "ollama"
172
+
173
+ return "ollama" # Default fallback
174
 
175
  def _get_provider_display_name(self, provider_name: str) -> str:
176
  """Get display name for provider"""
 
190
  # Add current user message
191
  conversation_history.append({"role": "user", "content": user_input})
192
 
193
+ # Get provider (with intelligent fallback)
194
  provider = llm_factory.get_provider(provider_name)
195
 
196
  # Generate response with timeout