Spaces:

bitsinthesky
/

openai-chatbot-mcp

Sleeping

App Files Files Community

Julian Vanecek commited on Jul 9, 2025

Commit

942ca5c

1 Parent(s): 285457c

more debugging comments

Browse files

Files changed (4) hide show

backend/chatbot_backend.py +126 -5
backend/embeddings.py +2 -0
config/config.json +2 -2
frontend/gradio_app.py +2 -2

backend/chatbot_backend.py CHANGED Viewed

@@ -77,6 +77,46 @@ class ChatbotBackend:
         input_cost = (input_tokens / 1_000_000) * input_cost_per_million
         return f"\n💭 Generating response... ({input_tokens:,} tokens, ~${input_cost:.4f})\n\n"
     def format_context_for_display(self, version_results: List[Dict], general_results: List[Dict],
                                  product: str, version: str, max_length: int = 500) -> str:
         """Format context chunks for display with truncation."""
@@ -121,13 +161,21 @@ class ChatbotBackend:
     def query_with_version(self, query: str, product: str, version: str,
                          custom_prompt: Optional[str] = None,
-                         model: str = "gpt-4o",
-                         temperature: float = 0.7,
-                         max_tokens: int = 4000) -> Generator[Dict, None, None]:
         """
         Query the chatbot with automatic version-specific and general context.
         Yields streaming responses.
         """
         start_time = time.time()
         # Yield status update for RAG retrieval
@@ -253,7 +301,11 @@ class ChatbotBackend:
                         tool_name = tool_call["function"]["name"]
                         tool_args = json.loads(tool_call["function"]["arguments"])
-                        if tool_name == "search_vector_store":
                             status_msg = f"\n🔍 Searching {tool_args.get('vector_store_name', 'vector store')} for: {tool_args.get('query', '')}...\n"
                         elif tool_name == "read_document":
                             status_msg = f"\n📄 Reading document: {tool_args.get('page_id', '')}...\n"
@@ -267,7 +319,20 @@ class ChatbotBackend:
                         }
                     # Execute tool calls
-                    tool_results = self._execute_tool_calls(tool_calls)
                     # Continue conversation with tool results
                     messages.append({
@@ -352,6 +417,62 @@ class ChatbotBackend:
                 "done": True
             }
     def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[Dict]:
         """Execute tool calls and return results."""
         results = []

         input_cost = (input_tokens / 1_000_000) * input_cost_per_million
         return f"\n💭 Generating response... ({input_tokens:,} tokens, ~${input_cost:.4f})\n\n"
+    # DEBUG METHOD: Remove in production - formats vector search chunks for display
+    def _format_vector_search_display(self, result: Dict, max_length: int = 500) -> str:
+        """Format vector search results for display. DEBUG ONLY - REMOVE IN PRODUCTION."""
+        formatted = ["📄 Retrieved context chunks:\n```"]
+        if "version" in result and result.get("results"):
+            # Single version search
+            formatted.append(f"=== {result['version']} ===")
+            max_display = self.config.get("max_chunks_to_display", 5)
+            for i, chunk in enumerate(result["results"][:max_display], 1):
+                text = chunk.get('quote', chunk.get('text', ''))
+                file_id = chunk.get('file_id', 'Unknown')
+                similarity = chunk.get('similarity', 0)
+                # Truncate long texts
+                if len(text) > max_length:
+                    text = text[:max_length] + "..."
+                formatted.append(f"\n[Chunk {i} - Source: {file_id} | Similarity: {similarity:.3f}]")
+                formatted.append(text)
+        elif isinstance(result.get("results"), dict):
+            # Multi-version search
+            for version, version_results in result["results"].items():
+                formatted.append(f"\n=== {version} ===")
+                for i, chunk in enumerate(version_results[:3], 1):
+                    text = chunk.get('quote', chunk.get('text', ''))
+                    file_id = chunk.get('file_id', 'Unknown')
+                    similarity = chunk.get('similarity', 0)
+                    # Truncate long texts
+                    if len(text) > max_length:
+                        text = text[:max_length] + "..."
+                    formatted.append(f"\n[Chunk {i} - Source: {file_id} | Similarity: {similarity:.3f}]")
+                    formatted.append(text)
+        formatted.append("```\n")
+        return "\n".join(formatted)
     def format_context_for_display(self, version_results: List[Dict], general_results: List[Dict],
                                  product: str, version: str, max_length: int = 500) -> str:
         """Format context chunks for display with truncation."""
     def query_with_version(self, query: str, product: str, version: str,
                          custom_prompt: Optional[str] = None,
+                         model: Optional[str] = None,
+                         temperature: Optional[float] = None,
+                         max_tokens: Optional[int] = None) -> Generator[Dict, None, None]:
         """
         Query the chatbot with automatic version-specific and general context.
         Yields streaming responses.
         """
+        # Use config defaults if not provided
+        if model is None:
+            model = self.config.get("default_model", "gpt-4.1-2025-04-14")
+        if temperature is None:
+            temperature = self.config.get("default_temperature", 0)
+        if max_tokens is None:
+            max_tokens = self.config.get("default_max_tokens", 4000)
         start_time = time.time()
         # Yield status update for RAG retrieval
                         tool_name = tool_call["function"]["name"]
                         tool_args = json.loads(tool_call["function"]["arguments"])
+                        if tool_name == "vector_search":
+                            versions = tool_args.get('versions', [])
+                            query = tool_args.get('query', '')
+                            status_msg = f"\n🔍 Vector searching {', '.join(versions)} for: '{query}'...\n"
+                        elif tool_name == "search_vector_store":
                             status_msg = f"\n🔍 Searching {tool_args.get('vector_store_name', 'vector store')} for: {tool_args.get('query', '')}...\n"
                         elif tool_name == "read_document":
                             status_msg = f"\n📄 Reading document: {tool_args.get('page_id', '')}...\n"
                         }
                     # Execute tool calls
+                    # DEBUG: In production, replace this entire block with:
+                    # tool_results = self._execute_tool_calls(tool_calls)
+                    tool_results = []
+                    for result in self._execute_tool_calls_with_display(tool_calls):
+                        if result.get("display"):
+                            # Yield display content
+                            yield {
+                                "type": "content",
+                                "content": result["display"],
+                                "done": False
+                            }
+                        else:
+                            # Collect tool result
+                            tool_results.append(result)
                     # Continue conversation with tool results
                     messages.append({
                 "done": True
             }
+    # DEBUG METHOD: Remove in production - adds display output for vector search results
+    def _execute_tool_calls_with_display(self, tool_calls: List[Dict]) -> Generator[Dict, None, None]:
+        """Execute tool calls and yield results with optional display content. DEBUG ONLY - REMOVE IN PRODUCTION."""
+        for tool_call in tool_calls:
+            function_name = tool_call["function"]["name"]
+            arguments = json.loads(tool_call["function"]["arguments"])
+            if function_name == "vector_search":
+                result = execute_vector_search(
+                    self.vector_store_manager,
+                    arguments["query"],
+                    arguments["versions"],
+                    arguments.get("max_results_per_version", 5)
+                )
+                # Yield display content for vector search results
+                if result["status"] == "success" and result.get("results"):
+                    display_content = self._format_vector_search_display(result)
+                    yield {"display": display_content}
+                content = format_search_results_for_context(result)
+            # Legacy support for old tool names
+            elif function_name in ["search_vector_store", "search_multiple_versions"]:
+                # Convert old format to new format
+                if function_name == "search_vector_store":
+                    versions = [arguments.get("vector_store_name")]
+                    max_results = arguments.get("max_results", 5)
+                else:
+                    versions = arguments.get("versions", [])
+                    max_results = arguments.get("max_results_per_version", 3)
+                result = execute_vector_search(
+                    self.vector_store_manager,
+                    arguments["query"],
+                    versions,
+                    max_results
+                )
+                content = format_search_results_for_context(result)
+            elif function_name == "read_document_pages":
+                result = execute_document_read(
+                    self.document_reader,
+                    arguments["document_name"],
+                    arguments.get("page_numbers")
+                )
+                content = format_document_content_for_context(result)
+            else:
+                content = f"Unknown function: {function_name}"
+            yield {
+                "tool_call_id": tool_call["id"],
+                "content": content
+            }
     def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[Dict]:
         """Execute tool calls and return results."""
         results = []

backend/embeddings.py CHANGED Viewed

@@ -48,6 +48,8 @@ class EmbeddingSearch:
     def search_chunks(self, query: str, chunks: List[Dict], top_k: int = 5) -> List[Dict]:
         """Search chunks using embedding similarity."""
         # Generate query embedding
         logger.info(f"Searching for: {query}")
         query_embedding = self.generate_embedding(query)

     def search_chunks(self, query: str, chunks: List[Dict], top_k: int = 5) -> List[Dict]:
         """Search chunks using embedding similarity."""
         # Generate query embedding
+        # DEBUG: Remove this line in production - shows exact query being vectorized
+        logger.info(f"🔍 VECTORIZING QUERY STRING: '{query}'")
         logger.info(f"Searching for: {query}")
         query_embedding = self.generate_embedding(query)

config/config.json CHANGED Viewed

@@ -30,8 +30,8 @@
     }
   },
   "default_model": "gpt-4.1-2025-04-14",
-  "temperature": 0.7,
-  "max_completion_tokens": 4000,
   "chunk_size": 1000,
   "chunk_overlap": 200,
   "max_chunks_to_retrieve": 7,

     }
   },
   "default_model": "gpt-4.1-2025-04-14",
+  "default_temperature": 0,
+  "default_max_tokens": 4000,
   "chunk_size": 1000,
   "chunk_overlap": 200,
   "max_chunks_to_retrieve": 7,

frontend/gradio_app.py CHANGED Viewed

@@ -198,7 +198,7 @@ class GradioApp:
                         temperature = gr.Slider(
                             minimum=0,
                             maximum=1,
-                            value=0.7,
                             step=0.1,
                             label="Temperature",
                             info="Higher = more creative, Lower = more focused"
@@ -207,7 +207,7 @@ class GradioApp:
                         max_tokens = gr.Slider(
                             minimum=100,
                             maximum=8000,
-                            value=4000,
                             step=100,
                             label="Max Tokens",
                             info="Maximum response length"

                         temperature = gr.Slider(
                             minimum=0,
                             maximum=1,
+                            value=self.chatbot.config.get("default_temperature", 0),
                             step=0.1,
                             label="Temperature",
                             info="Higher = more creative, Lower = more focused"
                         max_tokens = gr.Slider(
                             minimum=100,
                             maximum=8000,
+                            value=self.chatbot.config.get("default_max_tokens", 4000),
                             step=100,
                             label="Max Tokens",
                             info="Maximum response length"