Spaces:

Asish22
/

code-crawler

Running

Asish Karthikeya Gogineni commited on Jan 29

Commit

1e94a5a

1 Parent(s): 986715f

feat: Inject File Tree context for holistic code understanding

- Added _generate_file_tree_str to ChatEngine
- Injected full project file list into system prompt
- RAG now provides both top-30 relevant code blocks AND the complete file structure
- Follow-up to improve 'Antigravity-like' awareness

Files changed (1) hide show

code_chatbot/rag.py +23 -2

code_chatbot/rag.py CHANGED Viewed

@@ -363,6 +363,24 @@ class ChatEngine:
             return f"Error: {str(e)}", []
     def _linear_chat(self, question: str) -> Tuple[str, List[dict]]:
     def _prepare_chat_context(self, question: str):
         """Prepare messages and sources for chat/stream."""
         # 1. Retrieve relevant documents
@@ -379,7 +397,6 @@ class ChatEngine:
             return None, [], ""
         # Build context from documents - Use FULL content, not truncated
-        # Gemini 1.5/2.0 can handle 1M+ tokens, so we should provide as much context as possible.
         context_parts = []
         for doc in docs[:30]: # Use top 30 documents
             file_path = doc.metadata.get('file_path', 'unknown')
@@ -388,6 +405,10 @@ class ChatEngine:
         context_text = "\n\n".join(context_parts)
         # Extract sources
         sources = []
         for doc in docs[:30]:
@@ -402,7 +423,7 @@ class ChatEngine:
         base_prompt = get_prompt_for_provider("linear_rag", self.provider)
         qa_system_prompt = base_prompt.format(
             repo_name=self.repo_name,
-            context=context_text
         )
         # Build messages with history

             return f"Error: {str(e)}", []
     def _linear_chat(self, question: str) -> Tuple[str, List[dict]]:
+    def _generate_file_tree_str(self):
+        """Generate a string representation of the file tree."""
+        if not self.repo_files:
+            return ""
+        # Generate simple list of relative paths
+        paths = set()
+        for f in self.repo_files:
+            # Clean path
+            if self.repo_dir and f.startswith(self.repo_dir):
+                rel = os.path.relpath(f, self.repo_dir)
+            else:
+                rel = f
+            paths.add(rel)
+        tree_str = "Project Structure (File Tree):\n" + "\n".join(sorted(list(paths)))
+        return tree_str
     def _prepare_chat_context(self, question: str):
         """Prepare messages and sources for chat/stream."""
         # 1. Retrieve relevant documents
             return None, [], ""
         # Build context from documents - Use FULL content, not truncated
         context_parts = []
         for doc in docs[:30]: # Use top 30 documents
             file_path = doc.metadata.get('file_path', 'unknown')
         context_text = "\n\n".join(context_parts)
+        # Inject File Tree into context
+        file_tree = self._generate_file_tree_str()
+        full_context = f"{file_tree}\n\nRETRIEVED CONTEXT:\n{context_text}"
         # Extract sources
         sources = []
         for doc in docs[:30]:
         base_prompt = get_prompt_for_provider("linear_rag", self.provider)
         qa_system_prompt = base_prompt.format(
             repo_name=self.repo_name,
+            context=full_context
         )
         # Build messages with history