Julian Vanecek commited on
Commit
942ca5c
·
1 Parent(s): 285457c

more debugging comments

Browse files
backend/chatbot_backend.py CHANGED
@@ -77,6 +77,46 @@ class ChatbotBackend:
77
  input_cost = (input_tokens / 1_000_000) * input_cost_per_million
78
  return f"\n💭 Generating response... ({input_tokens:,} tokens, ~${input_cost:.4f})\n\n"
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def format_context_for_display(self, version_results: List[Dict], general_results: List[Dict],
81
  product: str, version: str, max_length: int = 500) -> str:
82
  """Format context chunks for display with truncation."""
@@ -121,13 +161,21 @@ class ChatbotBackend:
121
 
122
  def query_with_version(self, query: str, product: str, version: str,
123
  custom_prompt: Optional[str] = None,
124
- model: str = "gpt-4o",
125
- temperature: float = 0.7,
126
- max_tokens: int = 4000) -> Generator[Dict, None, None]:
127
  """
128
  Query the chatbot with automatic version-specific and general context.
129
  Yields streaming responses.
130
  """
 
 
 
 
 
 
 
 
131
  start_time = time.time()
132
 
133
  # Yield status update for RAG retrieval
@@ -253,7 +301,11 @@ class ChatbotBackend:
253
  tool_name = tool_call["function"]["name"]
254
  tool_args = json.loads(tool_call["function"]["arguments"])
255
 
256
- if tool_name == "search_vector_store":
 
 
 
 
257
  status_msg = f"\n🔍 Searching {tool_args.get('vector_store_name', 'vector store')} for: {tool_args.get('query', '')}...\n"
258
  elif tool_name == "read_document":
259
  status_msg = f"\n📄 Reading document: {tool_args.get('page_id', '')}...\n"
@@ -267,7 +319,20 @@ class ChatbotBackend:
267
  }
268
 
269
  # Execute tool calls
270
- tool_results = self._execute_tool_calls(tool_calls)
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  # Continue conversation with tool results
273
  messages.append({
@@ -352,6 +417,62 @@ class ChatbotBackend:
352
  "done": True
353
  }
354
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[Dict]:
356
  """Execute tool calls and return results."""
357
  results = []
 
77
  input_cost = (input_tokens / 1_000_000) * input_cost_per_million
78
  return f"\n💭 Generating response... ({input_tokens:,} tokens, ~${input_cost:.4f})\n\n"
79
 
80
+ # DEBUG METHOD: Remove in production - formats vector search chunks for display
81
+ def _format_vector_search_display(self, result: Dict, max_length: int = 500) -> str:
82
+ """Format vector search results for display. DEBUG ONLY - REMOVE IN PRODUCTION."""
83
+ formatted = ["📄 Retrieved context chunks:\n```"]
84
+
85
+ if "version" in result and result.get("results"):
86
+ # Single version search
87
+ formatted.append(f"=== {result['version']} ===")
88
+ max_display = self.config.get("max_chunks_to_display", 5)
89
+ for i, chunk in enumerate(result["results"][:max_display], 1):
90
+ text = chunk.get('quote', chunk.get('text', ''))
91
+ file_id = chunk.get('file_id', 'Unknown')
92
+ similarity = chunk.get('similarity', 0)
93
+
94
+ # Truncate long texts
95
+ if len(text) > max_length:
96
+ text = text[:max_length] + "..."
97
+
98
+ formatted.append(f"\n[Chunk {i} - Source: {file_id} | Similarity: {similarity:.3f}]")
99
+ formatted.append(text)
100
+
101
+ elif isinstance(result.get("results"), dict):
102
+ # Multi-version search
103
+ for version, version_results in result["results"].items():
104
+ formatted.append(f"\n=== {version} ===")
105
+ for i, chunk in enumerate(version_results[:3], 1):
106
+ text = chunk.get('quote', chunk.get('text', ''))
107
+ file_id = chunk.get('file_id', 'Unknown')
108
+ similarity = chunk.get('similarity', 0)
109
+
110
+ # Truncate long texts
111
+ if len(text) > max_length:
112
+ text = text[:max_length] + "..."
113
+
114
+ formatted.append(f"\n[Chunk {i} - Source: {file_id} | Similarity: {similarity:.3f}]")
115
+ formatted.append(text)
116
+
117
+ formatted.append("```\n")
118
+ return "\n".join(formatted)
119
+
120
  def format_context_for_display(self, version_results: List[Dict], general_results: List[Dict],
121
  product: str, version: str, max_length: int = 500) -> str:
122
  """Format context chunks for display with truncation."""
 
161
 
162
  def query_with_version(self, query: str, product: str, version: str,
163
  custom_prompt: Optional[str] = None,
164
+ model: Optional[str] = None,
165
+ temperature: Optional[float] = None,
166
+ max_tokens: Optional[int] = None) -> Generator[Dict, None, None]:
167
  """
168
  Query the chatbot with automatic version-specific and general context.
169
  Yields streaming responses.
170
  """
171
+ # Use config defaults if not provided
172
+ if model is None:
173
+ model = self.config.get("default_model", "gpt-4.1-2025-04-14")
174
+ if temperature is None:
175
+ temperature = self.config.get("default_temperature", 0)
176
+ if max_tokens is None:
177
+ max_tokens = self.config.get("default_max_tokens", 4000)
178
+
179
  start_time = time.time()
180
 
181
  # Yield status update for RAG retrieval
 
301
  tool_name = tool_call["function"]["name"]
302
  tool_args = json.loads(tool_call["function"]["arguments"])
303
 
304
+ if tool_name == "vector_search":
305
+ versions = tool_args.get('versions', [])
306
+ query = tool_args.get('query', '')
307
+ status_msg = f"\n🔍 Vector searching {', '.join(versions)} for: '{query}'...\n"
308
+ elif tool_name == "search_vector_store":
309
  status_msg = f"\n🔍 Searching {tool_args.get('vector_store_name', 'vector store')} for: {tool_args.get('query', '')}...\n"
310
  elif tool_name == "read_document":
311
  status_msg = f"\n📄 Reading document: {tool_args.get('page_id', '')}...\n"
 
319
  }
320
 
321
  # Execute tool calls
322
+ # DEBUG: In production, replace this entire block with:
323
+ # tool_results = self._execute_tool_calls(tool_calls)
324
+ tool_results = []
325
+ for result in self._execute_tool_calls_with_display(tool_calls):
326
+ if result.get("display"):
327
+ # Yield display content
328
+ yield {
329
+ "type": "content",
330
+ "content": result["display"],
331
+ "done": False
332
+ }
333
+ else:
334
+ # Collect tool result
335
+ tool_results.append(result)
336
 
337
  # Continue conversation with tool results
338
  messages.append({
 
417
  "done": True
418
  }
419
 
420
+ # DEBUG METHOD: Remove in production - adds display output for vector search results
421
+ def _execute_tool_calls_with_display(self, tool_calls: List[Dict]) -> Generator[Dict, None, None]:
422
+ """Execute tool calls and yield results with optional display content. DEBUG ONLY - REMOVE IN PRODUCTION."""
423
+ for tool_call in tool_calls:
424
+ function_name = tool_call["function"]["name"]
425
+ arguments = json.loads(tool_call["function"]["arguments"])
426
+
427
+ if function_name == "vector_search":
428
+ result = execute_vector_search(
429
+ self.vector_store_manager,
430
+ arguments["query"],
431
+ arguments["versions"],
432
+ arguments.get("max_results_per_version", 5)
433
+ )
434
+
435
+ # Yield display content for vector search results
436
+ if result["status"] == "success" and result.get("results"):
437
+ display_content = self._format_vector_search_display(result)
438
+ yield {"display": display_content}
439
+
440
+ content = format_search_results_for_context(result)
441
+
442
+ # Legacy support for old tool names
443
+ elif function_name in ["search_vector_store", "search_multiple_versions"]:
444
+ # Convert old format to new format
445
+ if function_name == "search_vector_store":
446
+ versions = [arguments.get("vector_store_name")]
447
+ max_results = arguments.get("max_results", 5)
448
+ else:
449
+ versions = arguments.get("versions", [])
450
+ max_results = arguments.get("max_results_per_version", 3)
451
+
452
+ result = execute_vector_search(
453
+ self.vector_store_manager,
454
+ arguments["query"],
455
+ versions,
456
+ max_results
457
+ )
458
+ content = format_search_results_for_context(result)
459
+
460
+ elif function_name == "read_document_pages":
461
+ result = execute_document_read(
462
+ self.document_reader,
463
+ arguments["document_name"],
464
+ arguments.get("page_numbers")
465
+ )
466
+ content = format_document_content_for_context(result)
467
+
468
+ else:
469
+ content = f"Unknown function: {function_name}"
470
+
471
+ yield {
472
+ "tool_call_id": tool_call["id"],
473
+ "content": content
474
+ }
475
+
476
  def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[Dict]:
477
  """Execute tool calls and return results."""
478
  results = []
backend/embeddings.py CHANGED
@@ -48,6 +48,8 @@ class EmbeddingSearch:
48
  def search_chunks(self, query: str, chunks: List[Dict], top_k: int = 5) -> List[Dict]:
49
  """Search chunks using embedding similarity."""
50
  # Generate query embedding
 
 
51
  logger.info(f"Searching for: {query}")
52
  query_embedding = self.generate_embedding(query)
53
 
 
48
  def search_chunks(self, query: str, chunks: List[Dict], top_k: int = 5) -> List[Dict]:
49
  """Search chunks using embedding similarity."""
50
  # Generate query embedding
51
+ # DEBUG: Remove this line in production - shows exact query being vectorized
52
+ logger.info(f"🔍 VECTORIZING QUERY STRING: '{query}'")
53
  logger.info(f"Searching for: {query}")
54
  query_embedding = self.generate_embedding(query)
55
 
config/config.json CHANGED
@@ -30,8 +30,8 @@
30
  }
31
  },
32
  "default_model": "gpt-4.1-2025-04-14",
33
- "temperature": 0.7,
34
- "max_completion_tokens": 4000,
35
  "chunk_size": 1000,
36
  "chunk_overlap": 200,
37
  "max_chunks_to_retrieve": 7,
 
30
  }
31
  },
32
  "default_model": "gpt-4.1-2025-04-14",
33
+ "default_temperature": 0,
34
+ "default_max_tokens": 4000,
35
  "chunk_size": 1000,
36
  "chunk_overlap": 200,
37
  "max_chunks_to_retrieve": 7,
frontend/gradio_app.py CHANGED
@@ -198,7 +198,7 @@ class GradioApp:
198
  temperature = gr.Slider(
199
  minimum=0,
200
  maximum=1,
201
- value=0.7,
202
  step=0.1,
203
  label="Temperature",
204
  info="Higher = more creative, Lower = more focused"
@@ -207,7 +207,7 @@ class GradioApp:
207
  max_tokens = gr.Slider(
208
  minimum=100,
209
  maximum=8000,
210
- value=4000,
211
  step=100,
212
  label="Max Tokens",
213
  info="Maximum response length"
 
198
  temperature = gr.Slider(
199
  minimum=0,
200
  maximum=1,
201
+ value=self.chatbot.config.get("default_temperature", 0),
202
  step=0.1,
203
  label="Temperature",
204
  info="Higher = more creative, Lower = more focused"
 
207
  max_tokens = gr.Slider(
208
  minimum=100,
209
  maximum=8000,
210
+ value=self.chatbot.config.get("default_max_tokens", 4000),
211
  step=100,
212
  label="Max Tokens",
213
  info="Maximum response length"