Asish Karthikeya Gogineni commited on
Commit
1e94a5a
·
1 Parent(s): 986715f

feat: Inject File Tree context for holistic code understanding

Browse files

- Added _generate_file_tree_str to ChatEngine
- Injected full project file list into system prompt
- RAG now provides both top-30 relevant code blocks AND the complete file structure
- Follow-up to improve 'Antigravity-like' awareness

Files changed (1) hide show
  1. code_chatbot/rag.py +23 -2
code_chatbot/rag.py CHANGED
@@ -363,6 +363,24 @@ class ChatEngine:
363
  return f"Error: {str(e)}", []
364
 
365
  def _linear_chat(self, question: str) -> Tuple[str, List[dict]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  def _prepare_chat_context(self, question: str):
367
  """Prepare messages and sources for chat/stream."""
368
  # 1. Retrieve relevant documents
@@ -379,7 +397,6 @@ class ChatEngine:
379
  return None, [], ""
380
 
381
  # Build context from documents - Use FULL content, not truncated
382
- # Gemini 1.5/2.0 can handle 1M+ tokens, so we should provide as much context as possible.
383
  context_parts = []
384
  for doc in docs[:30]: # Use top 30 documents
385
  file_path = doc.metadata.get('file_path', 'unknown')
@@ -388,6 +405,10 @@ class ChatEngine:
388
 
389
  context_text = "\n\n".join(context_parts)
390
 
 
 
 
 
391
  # Extract sources
392
  sources = []
393
  for doc in docs[:30]:
@@ -402,7 +423,7 @@ class ChatEngine:
402
  base_prompt = get_prompt_for_provider("linear_rag", self.provider)
403
  qa_system_prompt = base_prompt.format(
404
  repo_name=self.repo_name,
405
- context=context_text
406
  )
407
 
408
  # Build messages with history
 
363
  return f"Error: {str(e)}", []
364
 
365
  def _linear_chat(self, question: str) -> Tuple[str, List[dict]]:
366
+ def _generate_file_tree_str(self):
367
+ """Generate a string representation of the file tree."""
368
+ if not self.repo_files:
369
+ return ""
370
+
371
+ # Generate simple list of relative paths
372
+ paths = set()
373
+ for f in self.repo_files:
374
+ # Clean path
375
+ if self.repo_dir and f.startswith(self.repo_dir):
376
+ rel = os.path.relpath(f, self.repo_dir)
377
+ else:
378
+ rel = f
379
+ paths.add(rel)
380
+
381
+ tree_str = "Project Structure (File Tree):\n" + "\n".join(sorted(list(paths)))
382
+ return tree_str
383
+
384
  def _prepare_chat_context(self, question: str):
385
  """Prepare messages and sources for chat/stream."""
386
  # 1. Retrieve relevant documents
 
397
  return None, [], ""
398
 
399
  # Build context from documents - Use FULL content, not truncated
 
400
  context_parts = []
401
  for doc in docs[:30]: # Use top 30 documents
402
  file_path = doc.metadata.get('file_path', 'unknown')
 
405
 
406
  context_text = "\n\n".join(context_parts)
407
 
408
+ # Inject File Tree into context
409
+ file_tree = self._generate_file_tree_str()
410
+ full_context = f"{file_tree}\n\nRETRIEVED CONTEXT:\n{context_text}"
411
+
412
  # Extract sources
413
  sources = []
414
  for doc in docs[:30]:
 
423
  base_prompt = get_prompt_for_provider("linear_rag", self.provider)
424
  qa_system_prompt = base_prompt.format(
425
  repo_name=self.repo_name,
426
+ context=full_context
427
  )
428
 
429
  # Build messages with history