Spaces:
Sleeping
Sleeping
Julian Vanecek commited on
Commit ·
942ca5c
1
Parent(s): 285457c
more debugging comments
Browse files- backend/chatbot_backend.py +126 -5
- backend/embeddings.py +2 -0
- config/config.json +2 -2
- frontend/gradio_app.py +2 -2
backend/chatbot_backend.py
CHANGED
|
@@ -77,6 +77,46 @@ class ChatbotBackend:
|
|
| 77 |
input_cost = (input_tokens / 1_000_000) * input_cost_per_million
|
| 78 |
return f"\n💭 Generating response... ({input_tokens:,} tokens, ~${input_cost:.4f})\n\n"
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
def format_context_for_display(self, version_results: List[Dict], general_results: List[Dict],
|
| 81 |
product: str, version: str, max_length: int = 500) -> str:
|
| 82 |
"""Format context chunks for display with truncation."""
|
|
@@ -121,13 +161,21 @@ class ChatbotBackend:
|
|
| 121 |
|
| 122 |
def query_with_version(self, query: str, product: str, version: str,
|
| 123 |
custom_prompt: Optional[str] = None,
|
| 124 |
-
model: str =
|
| 125 |
-
temperature: float =
|
| 126 |
-
max_tokens: int =
|
| 127 |
"""
|
| 128 |
Query the chatbot with automatic version-specific and general context.
|
| 129 |
Yields streaming responses.
|
| 130 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
start_time = time.time()
|
| 132 |
|
| 133 |
# Yield status update for RAG retrieval
|
|
@@ -253,7 +301,11 @@ class ChatbotBackend:
|
|
| 253 |
tool_name = tool_call["function"]["name"]
|
| 254 |
tool_args = json.loads(tool_call["function"]["arguments"])
|
| 255 |
|
| 256 |
-
if tool_name == "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
status_msg = f"\n🔍 Searching {tool_args.get('vector_store_name', 'vector store')} for: {tool_args.get('query', '')}...\n"
|
| 258 |
elif tool_name == "read_document":
|
| 259 |
status_msg = f"\n📄 Reading document: {tool_args.get('page_id', '')}...\n"
|
|
@@ -267,7 +319,20 @@ class ChatbotBackend:
|
|
| 267 |
}
|
| 268 |
|
| 269 |
# Execute tool calls
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
# Continue conversation with tool results
|
| 273 |
messages.append({
|
|
@@ -352,6 +417,62 @@ class ChatbotBackend:
|
|
| 352 |
"done": True
|
| 353 |
}
|
| 354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[Dict]:
|
| 356 |
"""Execute tool calls and return results."""
|
| 357 |
results = []
|
|
|
|
| 77 |
input_cost = (input_tokens / 1_000_000) * input_cost_per_million
|
| 78 |
return f"\n💭 Generating response... ({input_tokens:,} tokens, ~${input_cost:.4f})\n\n"
|
| 79 |
|
| 80 |
+
# DEBUG METHOD: Remove in production - formats vector search chunks for display
|
| 81 |
+
def _format_vector_search_display(self, result: Dict, max_length: int = 500) -> str:
|
| 82 |
+
"""Format vector search results for display. DEBUG ONLY - REMOVE IN PRODUCTION."""
|
| 83 |
+
formatted = ["📄 Retrieved context chunks:\n```"]
|
| 84 |
+
|
| 85 |
+
if "version" in result and result.get("results"):
|
| 86 |
+
# Single version search
|
| 87 |
+
formatted.append(f"=== {result['version']} ===")
|
| 88 |
+
max_display = self.config.get("max_chunks_to_display", 5)
|
| 89 |
+
for i, chunk in enumerate(result["results"][:max_display], 1):
|
| 90 |
+
text = chunk.get('quote', chunk.get('text', ''))
|
| 91 |
+
file_id = chunk.get('file_id', 'Unknown')
|
| 92 |
+
similarity = chunk.get('similarity', 0)
|
| 93 |
+
|
| 94 |
+
# Truncate long texts
|
| 95 |
+
if len(text) > max_length:
|
| 96 |
+
text = text[:max_length] + "..."
|
| 97 |
+
|
| 98 |
+
formatted.append(f"\n[Chunk {i} - Source: {file_id} | Similarity: {similarity:.3f}]")
|
| 99 |
+
formatted.append(text)
|
| 100 |
+
|
| 101 |
+
elif isinstance(result.get("results"), dict):
|
| 102 |
+
# Multi-version search
|
| 103 |
+
for version, version_results in result["results"].items():
|
| 104 |
+
formatted.append(f"\n=== {version} ===")
|
| 105 |
+
for i, chunk in enumerate(version_results[:3], 1):
|
| 106 |
+
text = chunk.get('quote', chunk.get('text', ''))
|
| 107 |
+
file_id = chunk.get('file_id', 'Unknown')
|
| 108 |
+
similarity = chunk.get('similarity', 0)
|
| 109 |
+
|
| 110 |
+
# Truncate long texts
|
| 111 |
+
if len(text) > max_length:
|
| 112 |
+
text = text[:max_length] + "..."
|
| 113 |
+
|
| 114 |
+
formatted.append(f"\n[Chunk {i} - Source: {file_id} | Similarity: {similarity:.3f}]")
|
| 115 |
+
formatted.append(text)
|
| 116 |
+
|
| 117 |
+
formatted.append("```\n")
|
| 118 |
+
return "\n".join(formatted)
|
| 119 |
+
|
| 120 |
def format_context_for_display(self, version_results: List[Dict], general_results: List[Dict],
|
| 121 |
product: str, version: str, max_length: int = 500) -> str:
|
| 122 |
"""Format context chunks for display with truncation."""
|
|
|
|
| 161 |
|
| 162 |
def query_with_version(self, query: str, product: str, version: str,
|
| 163 |
custom_prompt: Optional[str] = None,
|
| 164 |
+
model: Optional[str] = None,
|
| 165 |
+
temperature: Optional[float] = None,
|
| 166 |
+
max_tokens: Optional[int] = None) -> Generator[Dict, None, None]:
|
| 167 |
"""
|
| 168 |
Query the chatbot with automatic version-specific and general context.
|
| 169 |
Yields streaming responses.
|
| 170 |
"""
|
| 171 |
+
# Use config defaults if not provided
|
| 172 |
+
if model is None:
|
| 173 |
+
model = self.config.get("default_model", "gpt-4.1-2025-04-14")
|
| 174 |
+
if temperature is None:
|
| 175 |
+
temperature = self.config.get("default_temperature", 0)
|
| 176 |
+
if max_tokens is None:
|
| 177 |
+
max_tokens = self.config.get("default_max_tokens", 4000)
|
| 178 |
+
|
| 179 |
start_time = time.time()
|
| 180 |
|
| 181 |
# Yield status update for RAG retrieval
|
|
|
|
| 301 |
tool_name = tool_call["function"]["name"]
|
| 302 |
tool_args = json.loads(tool_call["function"]["arguments"])
|
| 303 |
|
| 304 |
+
if tool_name == "vector_search":
|
| 305 |
+
versions = tool_args.get('versions', [])
|
| 306 |
+
query = tool_args.get('query', '')
|
| 307 |
+
status_msg = f"\n🔍 Vector searching {', '.join(versions)} for: '{query}'...\n"
|
| 308 |
+
elif tool_name == "search_vector_store":
|
| 309 |
status_msg = f"\n🔍 Searching {tool_args.get('vector_store_name', 'vector store')} for: {tool_args.get('query', '')}...\n"
|
| 310 |
elif tool_name == "read_document":
|
| 311 |
status_msg = f"\n📄 Reading document: {tool_args.get('page_id', '')}...\n"
|
|
|
|
| 319 |
}
|
| 320 |
|
| 321 |
# Execute tool calls
|
| 322 |
+
# DEBUG: In production, replace this entire block with:
|
| 323 |
+
# tool_results = self._execute_tool_calls(tool_calls)
|
| 324 |
+
tool_results = []
|
| 325 |
+
for result in self._execute_tool_calls_with_display(tool_calls):
|
| 326 |
+
if result.get("display"):
|
| 327 |
+
# Yield display content
|
| 328 |
+
yield {
|
| 329 |
+
"type": "content",
|
| 330 |
+
"content": result["display"],
|
| 331 |
+
"done": False
|
| 332 |
+
}
|
| 333 |
+
else:
|
| 334 |
+
# Collect tool result
|
| 335 |
+
tool_results.append(result)
|
| 336 |
|
| 337 |
# Continue conversation with tool results
|
| 338 |
messages.append({
|
|
|
|
| 417 |
"done": True
|
| 418 |
}
|
| 419 |
|
| 420 |
+
# DEBUG METHOD: Remove in production - adds display output for vector search results
|
| 421 |
+
def _execute_tool_calls_with_display(self, tool_calls: List[Dict]) -> Generator[Dict, None, None]:
|
| 422 |
+
"""Execute tool calls and yield results with optional display content. DEBUG ONLY - REMOVE IN PRODUCTION."""
|
| 423 |
+
for tool_call in tool_calls:
|
| 424 |
+
function_name = tool_call["function"]["name"]
|
| 425 |
+
arguments = json.loads(tool_call["function"]["arguments"])
|
| 426 |
+
|
| 427 |
+
if function_name == "vector_search":
|
| 428 |
+
result = execute_vector_search(
|
| 429 |
+
self.vector_store_manager,
|
| 430 |
+
arguments["query"],
|
| 431 |
+
arguments["versions"],
|
| 432 |
+
arguments.get("max_results_per_version", 5)
|
| 433 |
+
)
|
| 434 |
+
|
| 435 |
+
# Yield display content for vector search results
|
| 436 |
+
if result["status"] == "success" and result.get("results"):
|
| 437 |
+
display_content = self._format_vector_search_display(result)
|
| 438 |
+
yield {"display": display_content}
|
| 439 |
+
|
| 440 |
+
content = format_search_results_for_context(result)
|
| 441 |
+
|
| 442 |
+
# Legacy support for old tool names
|
| 443 |
+
elif function_name in ["search_vector_store", "search_multiple_versions"]:
|
| 444 |
+
# Convert old format to new format
|
| 445 |
+
if function_name == "search_vector_store":
|
| 446 |
+
versions = [arguments.get("vector_store_name")]
|
| 447 |
+
max_results = arguments.get("max_results", 5)
|
| 448 |
+
else:
|
| 449 |
+
versions = arguments.get("versions", [])
|
| 450 |
+
max_results = arguments.get("max_results_per_version", 3)
|
| 451 |
+
|
| 452 |
+
result = execute_vector_search(
|
| 453 |
+
self.vector_store_manager,
|
| 454 |
+
arguments["query"],
|
| 455 |
+
versions,
|
| 456 |
+
max_results
|
| 457 |
+
)
|
| 458 |
+
content = format_search_results_for_context(result)
|
| 459 |
+
|
| 460 |
+
elif function_name == "read_document_pages":
|
| 461 |
+
result = execute_document_read(
|
| 462 |
+
self.document_reader,
|
| 463 |
+
arguments["document_name"],
|
| 464 |
+
arguments.get("page_numbers")
|
| 465 |
+
)
|
| 466 |
+
content = format_document_content_for_context(result)
|
| 467 |
+
|
| 468 |
+
else:
|
| 469 |
+
content = f"Unknown function: {function_name}"
|
| 470 |
+
|
| 471 |
+
yield {
|
| 472 |
+
"tool_call_id": tool_call["id"],
|
| 473 |
+
"content": content
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[Dict]:
|
| 477 |
"""Execute tool calls and return results."""
|
| 478 |
results = []
|
backend/embeddings.py
CHANGED
|
@@ -48,6 +48,8 @@ class EmbeddingSearch:
|
|
| 48 |
def search_chunks(self, query: str, chunks: List[Dict], top_k: int = 5) -> List[Dict]:
|
| 49 |
"""Search chunks using embedding similarity."""
|
| 50 |
# Generate query embedding
|
|
|
|
|
|
|
| 51 |
logger.info(f"Searching for: {query}")
|
| 52 |
query_embedding = self.generate_embedding(query)
|
| 53 |
|
|
|
|
| 48 |
def search_chunks(self, query: str, chunks: List[Dict], top_k: int = 5) -> List[Dict]:
|
| 49 |
"""Search chunks using embedding similarity."""
|
| 50 |
# Generate query embedding
|
| 51 |
+
# DEBUG: Remove this line in production - shows exact query being vectorized
|
| 52 |
+
logger.info(f"🔍 VECTORIZING QUERY STRING: '{query}'")
|
| 53 |
logger.info(f"Searching for: {query}")
|
| 54 |
query_embedding = self.generate_embedding(query)
|
| 55 |
|
config/config.json
CHANGED
|
@@ -30,8 +30,8 @@
|
|
| 30 |
}
|
| 31 |
},
|
| 32 |
"default_model": "gpt-4.1-2025-04-14",
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
"chunk_size": 1000,
|
| 36 |
"chunk_overlap": 200,
|
| 37 |
"max_chunks_to_retrieve": 7,
|
|
|
|
| 30 |
}
|
| 31 |
},
|
| 32 |
"default_model": "gpt-4.1-2025-04-14",
|
| 33 |
+
"default_temperature": 0,
|
| 34 |
+
"default_max_tokens": 4000,
|
| 35 |
"chunk_size": 1000,
|
| 36 |
"chunk_overlap": 200,
|
| 37 |
"max_chunks_to_retrieve": 7,
|
frontend/gradio_app.py
CHANGED
|
@@ -198,7 +198,7 @@ class GradioApp:
|
|
| 198 |
temperature = gr.Slider(
|
| 199 |
minimum=0,
|
| 200 |
maximum=1,
|
| 201 |
-
value=
|
| 202 |
step=0.1,
|
| 203 |
label="Temperature",
|
| 204 |
info="Higher = more creative, Lower = more focused"
|
|
@@ -207,7 +207,7 @@ class GradioApp:
|
|
| 207 |
max_tokens = gr.Slider(
|
| 208 |
minimum=100,
|
| 209 |
maximum=8000,
|
| 210 |
-
value=4000,
|
| 211 |
step=100,
|
| 212 |
label="Max Tokens",
|
| 213 |
info="Maximum response length"
|
|
|
|
| 198 |
temperature = gr.Slider(
|
| 199 |
minimum=0,
|
| 200 |
maximum=1,
|
| 201 |
+
value=self.chatbot.config.get("default_temperature", 0),
|
| 202 |
step=0.1,
|
| 203 |
label="Temperature",
|
| 204 |
info="Higher = more creative, Lower = more focused"
|
|
|
|
| 207 |
max_tokens = gr.Slider(
|
| 208 |
minimum=100,
|
| 209 |
maximum=8000,
|
| 210 |
+
value=self.chatbot.config.get("default_max_tokens", 4000),
|
| 211 |
step=100,
|
| 212 |
label="Max Tokens",
|
| 213 |
info="Maximum response length"
|