Rulga commited on
Commit
a1d9e9c
·
1 Parent(s): 7d50823

Implement structured logging throughout the application; replace print statements with logger calls for improved error tracking and debugging

Browse files
Files changed (1) hide show
  1. app.py +52 -45
app.py CHANGED
@@ -3,6 +3,9 @@ import os
3
  import json
4
  import datetime
5
  from pathlib import Path
 
 
 
6
  from langdetect import detect # новый импорт
7
  from huggingface_hub import InferenceClient, HfApi
8
  from config.constants import DEFAULT_SYSTEM_MESSAGE
@@ -32,8 +35,13 @@ from web.evaluation_interface import (
32
  generate_evaluation_report_html,
33
  export_training_data_action
34
  )
35
- from src.analytics.chat_evaluator import ChatEvaluator
36
- import sys
 
 
 
 
 
37
 
38
  if not HF_TOKEN:
39
  raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
@@ -52,7 +60,7 @@ chat_evaluator = ChatEvaluator(
52
  chat_history_path=CHAT_HISTORY_PATH
53
  )
54
 
55
- print(f"Chat histories will be saved to: {CHAT_HISTORY_PATH}")
56
 
57
  def load_user_preferences():
58
  """Load user preferences from file"""
@@ -65,7 +73,7 @@ def load_user_preferences():
65
  "parameters": {}
66
  }
67
  except Exception as e:
68
- print(f"Error loading user preferences: {str(e)}")
69
  return {
70
  "selected_model": DEFAULT_MODEL,
71
  "parameters": {}
@@ -87,10 +95,10 @@ def save_user_preferences(model_key, parameters=None):
87
  with open(USER_PREFERENCES_PATH, 'w') as f:
88
  json.dump(preferences, f, indent=2)
89
 
90
- print(f"User preferences saved successfully!")
91
  return True
92
  except Exception as e:
93
- print(f"Error saving user preferences: {str(e)}")
94
  return False
95
 
96
  def initialize_client(model_id=None):
@@ -122,10 +130,10 @@ def switch_to_model(model_key):
122
  token=HF_TOKEN
123
  )
124
 
125
- print(f"Switched to model: {model_key}")
126
  return True
127
  except Exception as e:
128
- print(f"Error switching to model {model_key}: {str(e)}")
129
  return False
130
 
131
  def get_fallback_model(current_model):
@@ -139,12 +147,12 @@ def get_context(message, conversation_id):
139
  """Get context from knowledge base"""
140
  vector_store = load_vector_store()
141
  if vector_store is None:
142
- print("Knowledge base not found or failed to load")
143
  return ""
144
 
145
  # Check if vector_store is a string (error message) instead of an actual store
146
  if isinstance(vector_store, str):
147
- print(f"Error with vector store: {vector_store}")
148
  return ""
149
 
150
  try:
@@ -153,11 +161,11 @@ def get_context(message, conversation_id):
153
  context_docs = vector_store.similarity_search(message, k=2)
154
 
155
  # Add debug logging
156
- print(f"\nDebug - Query: {message}")
157
  for i, doc in enumerate(context_docs):
158
- print(f"\nDebug - Context {i+1}:")
159
- print(f"Source: {doc.metadata.get('source', 'unknown')}")
160
- print(f"Content: {doc.page_content[:200]}...")
161
 
162
  # Limit each fragment to 300 characters to reduce context dominance
163
  context_text = "\n\n".join([f"Context from {doc.metadata.get('source', 'unknown')}: {doc.page_content[:300]}..." for doc in context_docs])
@@ -170,7 +178,7 @@ def get_context(message, conversation_id):
170
 
171
  return context_text
172
  except Exception as e:
173
- print(f"Error getting context: {str(e)}")
174
  return ""
175
 
176
  def post_process_response(user_message, bot_response):
@@ -180,11 +188,11 @@ def post_process_response(user_message, bot_response):
180
  user_lang = detect_language(user_message)
181
  bot_lang = detect_language(bot_response)
182
 
183
- print(f"Debug - User language: {user_lang}, Bot response language: {bot_lang}")
184
 
185
  # If languages don't match and response is long enough to detect
186
  if user_lang != bot_lang and len(bot_response.strip()) > 20:
187
- print(f"Debug - Language mismatch detected! User: {user_lang}, Bot: {bot_lang}")
188
 
189
  # Add language mismatch warning
190
  warning = f"⚠️ [Language mismatch detected. Response should be in {user_lang}]\n\n"
@@ -192,33 +200,33 @@ def post_process_response(user_message, bot_response):
192
 
193
  return bot_response
194
  except Exception as e:
195
- print(f"Error in post_process_response: {str(e)}")
196
- return bot_response # Return original response in case of error
197
 
198
  def load_vector_store():
199
  """Load knowledge base from dataset"""
200
  try:
201
  from src.knowledge_base.dataset import DatasetManager
202
 
203
- print("Debug - Attempting to load vector store...")
204
  dataset = DatasetManager()
205
  success, result = dataset.download_vector_store()
206
 
207
- print(f"Debug - Download result: success={success}, result_type={type(result)}")
208
 
209
  if success:
210
  if isinstance(result, str):
211
- print(f"Debug - Error message received: {result}")
212
  return None
213
  return result
214
  else:
215
- print(f"Debug - Failed to load vector store: {result}")
216
  return None
217
 
218
  except Exception as e:
219
  import traceback
220
- print(f"Exception loading knowledge base: {str(e)}")
221
- print(traceback.format_exc())
222
  return None
223
 
224
  def detect_language(text):
@@ -229,12 +237,11 @@ def detect_language(text):
229
 
230
  # Minimum text length for reliable detection - reduced to 5 characters
231
  if len(cleaned_text) < 5:
232
- print(f"Text too short for reliable detection: '{cleaned_text}'")
233
- # Try to detect anyway for short texts instead of defaulting to English
234
  try:
235
  return detect(cleaned_text)
236
  except:
237
- return "en" # Default only if detection fails
238
 
239
  lang = detect(cleaned_text)
240
 
@@ -252,14 +259,14 @@ def detect_language(text):
252
 
253
  # Log detection result
254
  if lang not in supported_langs:
255
- print(f"Detected uncommon language: {lang} for text: '{cleaned_text[:50]}...'")
256
 
257
  # Return detected language even if not in supported list
258
  return lang
259
 
260
  except Exception as e:
261
- print(f"Language detection error: {str(e)} for text: '{text[:50]}...'")
262
- return "en" # Default to English only on error
263
 
264
  def respond(
265
  message,
@@ -275,7 +282,7 @@ def respond(
275
  try:
276
  # Determine user language
277
  user_lang = detect_language(message)
278
- print(f"Debug - Detected user language: {user_lang}")
279
 
280
  # Add language instruction at the end of system message to increase its importance
281
  language_instruction = f"\nIMPORTANT: You MUST respond in {user_lang} language ONLY."
@@ -309,7 +316,7 @@ def respond(
309
  return new_history, conversation_id
310
 
311
  except Exception as e:
312
- print(f"API Error: {str(e)}")
313
  error_msg = format_friendly_error(str(e))
314
 
315
  # --- Format Error Response ---
@@ -354,9 +361,9 @@ def log_api_error(user_message, error_message, model_id, is_fallback=False):
354
  f.write(f"Error: {error_message}\n")
355
  f.write(f"Fallback attempt: {is_fallback}\n")
356
 
357
- print(f"API error logged to {log_path}")
358
  except Exception as e:
359
- print(f"Failed to log API error: {str(e)}")
360
 
361
  def update_kb():
362
  """Function to update existing knowledge base with new documents"""
@@ -407,7 +414,7 @@ def save_chat_history(history, conversation_id):
407
  with open(filepath, 'w', encoding='utf-8') as f:
408
  json.dump(chat_data, f, ensure_ascii=False, indent=2)
409
 
410
- print(f"Debug - Chat history saved locally to {filepath}")
411
 
412
  # Now upload to HuggingFace dataset
413
  try:
@@ -428,15 +435,15 @@ def save_chat_history(history, conversation_id):
428
  repo_type="dataset"
429
  )
430
 
431
- print(f"Debug - Chat history uploaded to dataset at {target_path}")
432
 
433
  except Exception as e:
434
- print(f"Warning - Failed to upload chat history to dataset: {str(e)}")
435
  # Continue execution even if upload fails
436
 
437
  return True
438
  except Exception as e:
439
- print(f"Error saving chat history: {str(e)}")
440
  return False
441
 
442
  def respond_and_clear(message, history, conversation_id):
@@ -467,7 +474,7 @@ def respond_and_clear(message, history, conversation_id):
467
  return new_history, new_conv_id, "" # Clear input
468
 
469
  except Exception as e:
470
- print(f"Error in respond_and_clear: {str(e)}")
471
 
472
  # Create safe error response
473
  error_history = [
@@ -675,7 +682,7 @@ def initialize_app():
675
  token=HF_TOKEN
676
  )
677
 
678
- print(f"App initialized with model: {ACTIVE_MODEL['name']}")
679
  return selected_model
680
 
681
  def initialize_chat_evaluator():
@@ -692,12 +699,12 @@ def initialize_chat_evaluator():
692
  os.makedirs(CHAT_HISTORY_PATH, exist_ok=True)
693
  os.makedirs(os.path.join(CHAT_HISTORY_PATH, 'evaluations'), exist_ok=True)
694
 
695
- print(f"Debug - Chat history path: {CHAT_HISTORY_PATH}")
696
- print(f"Debug - Number of chat files: {len(os.listdir(CHAT_HISTORY_PATH))}")
697
 
698
  return evaluator
699
  except Exception as e:
700
- print(f"Error initializing chat evaluator: {str(e)}")
701
  raise
702
 
703
  # Initialize HF client with token at startup
@@ -1062,6 +1069,6 @@ if __name__ == "__main__":
1062
 
1063
  # Check knowledge base availability in dataset
1064
  if not load_vector_store():
1065
- print("Knowledge base not found. Please create it through the interface.")
1066
 
1067
  demo.launch(share=True)
 
3
  import json
4
  import datetime
5
  from pathlib import Path
6
+ from src.analytics.chat_evaluator import ChatEvaluator
7
+ import sys
8
+ import logging
9
  from langdetect import detect # новый импорт
10
  from huggingface_hub import InferenceClient, HfApi
11
  from config.constants import DEFAULT_SYSTEM_MESSAGE
 
35
  generate_evaluation_report_html,
36
  export_training_data_action
37
  )
38
+
39
+ # Setup logging
40
+ logging.basicConfig(
41
+ level=logging.INFO,
42
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
43
+ )
44
+ logger = logging.getLogger(__name__)
45
 
46
  if not HF_TOKEN:
47
  raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
 
60
  chat_history_path=CHAT_HISTORY_PATH
61
  )
62
 
63
+ logger.info(f"Chat histories will be saved to: {CHAT_HISTORY_PATH}")
64
 
65
  def load_user_preferences():
66
  """Load user preferences from file"""
 
73
  "parameters": {}
74
  }
75
  except Exception as e:
76
+ logger.error(f"Error loading user preferences: {str(e)}")
77
  return {
78
  "selected_model": DEFAULT_MODEL,
79
  "parameters": {}
 
95
  with open(USER_PREFERENCES_PATH, 'w') as f:
96
  json.dump(preferences, f, indent=2)
97
 
98
+ logger.info("User preferences saved successfully!")
99
  return True
100
  except Exception as e:
101
+ logger.error(f"Error saving user preferences: {str(e)}")
102
  return False
103
 
104
  def initialize_client(model_id=None):
 
130
  token=HF_TOKEN
131
  )
132
 
133
+ logger.info(f"Switched to model: {model_key}")
134
  return True
135
  except Exception as e:
136
+ logger.error(f"Error switching to model {model_key}: {str(e)}")
137
  return False
138
 
139
  def get_fallback_model(current_model):
 
147
  """Get context from knowledge base"""
148
  vector_store = load_vector_store()
149
  if vector_store is None:
150
+ logger.warning("Knowledge base not found or failed to load")
151
  return ""
152
 
153
  # Check if vector_store is a string (error message) instead of an actual store
154
  if isinstance(vector_store, str):
155
+ logger.error(f"Error with vector store: {vector_store}")
156
  return ""
157
 
158
  try:
 
161
  context_docs = vector_store.similarity_search(message, k=2)
162
 
163
  # Add debug logging
164
+ logger.debug(f"Query: {message}")
165
  for i, doc in enumerate(context_docs):
166
+ logger.debug(f"Context {i+1}:")
167
+ logger.debug(f"Source: {doc.metadata.get('source', 'unknown')}")
168
+ logger.debug(f"Content: {doc.page_content[:200]}...")
169
 
170
  # Limit each fragment to 300 characters to reduce context dominance
171
  context_text = "\n\n".join([f"Context from {doc.metadata.get('source', 'unknown')}: {doc.page_content[:300]}..." for doc in context_docs])
 
178
 
179
  return context_text
180
  except Exception as e:
181
+ logger.error(f"Error getting context: {str(e)}")
182
  return ""
183
 
184
  def post_process_response(user_message, bot_response):
 
188
  user_lang = detect_language(user_message)
189
  bot_lang = detect_language(bot_response)
190
 
191
+ logger.debug(f"User language: {user_lang}, Bot response language: {bot_lang}")
192
 
193
  # If languages don't match and response is long enough to detect
194
  if user_lang != bot_lang and len(bot_response.strip()) > 20:
195
+ logger.warning(f"Language mismatch detected! User: {user_lang}, Bot: {bot_lang}")
196
 
197
  # Add language mismatch warning
198
  warning = f"⚠️ [Language mismatch detected. Response should be in {user_lang}]\n\n"
 
200
 
201
  return bot_response
202
  except Exception as e:
203
+ logger.error(f"Error in post_process_response: {str(e)}")
204
+ return bot_response
205
 
206
  def load_vector_store():
207
  """Load knowledge base from dataset"""
208
  try:
209
  from src.knowledge_base.dataset import DatasetManager
210
 
211
+ logger.debug("Attempting to load vector store...")
212
  dataset = DatasetManager()
213
  success, result = dataset.download_vector_store()
214
 
215
+ logger.debug(f"Download result: success={success}, result_type={type(result)}")
216
 
217
  if success:
218
  if isinstance(result, str):
219
+ logger.debug(f"Error message received: {result}")
220
  return None
221
  return result
222
  else:
223
+ logger.error(f"Failed to load vector store: {result}")
224
  return None
225
 
226
  except Exception as e:
227
  import traceback
228
+ logger.error(f"Exception loading knowledge base: {str(e)}")
229
+ logger.error(traceback.format_exc())
230
  return None
231
 
232
  def detect_language(text):
 
237
 
238
  # Minimum text length for reliable detection - reduced to 5 characters
239
  if len(cleaned_text) < 5:
240
+ logger.debug(f"Text too short for reliable detection: '{cleaned_text}'")
 
241
  try:
242
  return detect(cleaned_text)
243
  except:
244
+ return "en"
245
 
246
  lang = detect(cleaned_text)
247
 
 
259
 
260
  # Log detection result
261
  if lang not in supported_langs:
262
+ logger.warning(f"Detected uncommon language: {lang} for text: '{cleaned_text[:50]}...'")
263
 
264
  # Return detected language even if not in supported list
265
  return lang
266
 
267
  except Exception as e:
268
+ logger.error(f"Language detection error: {str(e)} for text: '{text[:50]}...'")
269
+ return "en"
270
 
271
  def respond(
272
  message,
 
282
  try:
283
  # Determine user language
284
  user_lang = detect_language(message)
285
+ logger.debug(f"Detected user language: {user_lang}")
286
 
287
  # Add language instruction at the end of system message to increase its importance
288
  language_instruction = f"\nIMPORTANT: You MUST respond in {user_lang} language ONLY."
 
316
  return new_history, conversation_id
317
 
318
  except Exception as e:
319
+ logger.error(f"API Error: {str(e)}")
320
  error_msg = format_friendly_error(str(e))
321
 
322
  # --- Format Error Response ---
 
361
  f.write(f"Error: {error_message}\n")
362
  f.write(f"Fallback attempt: {is_fallback}\n")
363
 
364
+ logger.info(f"API error logged to {log_path}")
365
  except Exception as e:
366
+ logger.error(f"Failed to log API error: {str(e)}")
367
 
368
  def update_kb():
369
  """Function to update existing knowledge base with new documents"""
 
414
  with open(filepath, 'w', encoding='utf-8') as f:
415
  json.dump(chat_data, f, ensure_ascii=False, indent=2)
416
 
417
+ logger.debug(f"Chat history saved locally to {filepath}")
418
 
419
  # Now upload to HuggingFace dataset
420
  try:
 
435
  repo_type="dataset"
436
  )
437
 
438
+ logger.debug(f"Chat history uploaded to dataset at {target_path}")
439
 
440
  except Exception as e:
441
+ logger.warning(f"Failed to upload chat history to dataset: {str(e)}")
442
  # Continue execution even if upload fails
443
 
444
  return True
445
  except Exception as e:
446
+ logger.error(f"Error saving chat history: {str(e)}")
447
  return False
448
 
449
  def respond_and_clear(message, history, conversation_id):
 
474
  return new_history, new_conv_id, "" # Clear input
475
 
476
  except Exception as e:
477
+ logger.error(f"Error in respond_and_clear: {str(e)}")
478
 
479
  # Create safe error response
480
  error_history = [
 
682
  token=HF_TOKEN
683
  )
684
 
685
+ logger.info(f"App initialized with model: {ACTIVE_MODEL['name']}")
686
  return selected_model
687
 
688
  def initialize_chat_evaluator():
 
699
  os.makedirs(CHAT_HISTORY_PATH, exist_ok=True)
700
  os.makedirs(os.path.join(CHAT_HISTORY_PATH, 'evaluations'), exist_ok=True)
701
 
702
+ logger.debug(f"Chat history path: {CHAT_HISTORY_PATH}")
703
+ logger.debug(f"Number of chat files: {len(os.listdir(CHAT_HISTORY_PATH))}")
704
 
705
  return evaluator
706
  except Exception as e:
707
+ logger.error(f"Error initializing chat evaluator: {str(e)}")
708
  raise
709
 
710
  # Initialize HF client with token at startup
 
1069
 
1070
  # Check knowledge base availability in dataset
1071
  if not load_vector_store():
1072
+ logger.warning("Knowledge base not found. Please create it through the interface.")
1073
 
1074
  demo.launch(share=True)