Spaces:
Running
Running
Asish Karthikeya Gogineni commited on
Commit ·
1e94a5a
1
Parent(s): 986715f
feat: Inject File Tree context for holistic code understanding
Browse files- Added _generate_file_tree_str to ChatEngine
- Injected full project file list into system prompt
- RAG now provides both top-30 relevant code blocks AND the complete file structure
- Follow-up to improve 'Antigravity-like' awareness
- code_chatbot/rag.py +23 -2
code_chatbot/rag.py
CHANGED
|
@@ -363,6 +363,24 @@ class ChatEngine:
|
|
| 363 |
return f"Error: {str(e)}", []
|
| 364 |
|
| 365 |
def _linear_chat(self, question: str) -> Tuple[str, List[dict]]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
def _prepare_chat_context(self, question: str):
|
| 367 |
"""Prepare messages and sources for chat/stream."""
|
| 368 |
# 1. Retrieve relevant documents
|
|
@@ -379,7 +397,6 @@ class ChatEngine:
|
|
| 379 |
return None, [], ""
|
| 380 |
|
| 381 |
# Build context from documents - Use FULL content, not truncated
|
| 382 |
-
# Gemini 1.5/2.0 can handle 1M+ tokens, so we should provide as much context as possible.
|
| 383 |
context_parts = []
|
| 384 |
for doc in docs[:30]: # Use top 30 documents
|
| 385 |
file_path = doc.metadata.get('file_path', 'unknown')
|
|
@@ -388,6 +405,10 @@ class ChatEngine:
|
|
| 388 |
|
| 389 |
context_text = "\n\n".join(context_parts)
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
# Extract sources
|
| 392 |
sources = []
|
| 393 |
for doc in docs[:30]:
|
|
@@ -402,7 +423,7 @@ class ChatEngine:
|
|
| 402 |
base_prompt = get_prompt_for_provider("linear_rag", self.provider)
|
| 403 |
qa_system_prompt = base_prompt.format(
|
| 404 |
repo_name=self.repo_name,
|
| 405 |
-
context=
|
| 406 |
)
|
| 407 |
|
| 408 |
# Build messages with history
|
|
|
|
| 363 |
return f"Error: {str(e)}", []
|
| 364 |
|
| 365 |
def _linear_chat(self, question: str) -> Tuple[str, List[dict]]:
|
| 366 |
+
def _generate_file_tree_str(self):
|
| 367 |
+
"""Generate a string representation of the file tree."""
|
| 368 |
+
if not self.repo_files:
|
| 369 |
+
return ""
|
| 370 |
+
|
| 371 |
+
# Generate simple list of relative paths
|
| 372 |
+
paths = set()
|
| 373 |
+
for f in self.repo_files:
|
| 374 |
+
# Clean path
|
| 375 |
+
if self.repo_dir and f.startswith(self.repo_dir):
|
| 376 |
+
rel = os.path.relpath(f, self.repo_dir)
|
| 377 |
+
else:
|
| 378 |
+
rel = f
|
| 379 |
+
paths.add(rel)
|
| 380 |
+
|
| 381 |
+
tree_str = "Project Structure (File Tree):\n" + "\n".join(sorted(list(paths)))
|
| 382 |
+
return tree_str
|
| 383 |
+
|
| 384 |
def _prepare_chat_context(self, question: str):
|
| 385 |
"""Prepare messages and sources for chat/stream."""
|
| 386 |
# 1. Retrieve relevant documents
|
|
|
|
| 397 |
return None, [], ""
|
| 398 |
|
| 399 |
# Build context from documents - Use FULL content, not truncated
|
|
|
|
| 400 |
context_parts = []
|
| 401 |
for doc in docs[:30]: # Use top 30 documents
|
| 402 |
file_path = doc.metadata.get('file_path', 'unknown')
|
|
|
|
| 405 |
|
| 406 |
context_text = "\n\n".join(context_parts)
|
| 407 |
|
| 408 |
+
# Inject File Tree into context
|
| 409 |
+
file_tree = self._generate_file_tree_str()
|
| 410 |
+
full_context = f"{file_tree}\n\nRETRIEVED CONTEXT:\n{context_text}"
|
| 411 |
+
|
| 412 |
# Extract sources
|
| 413 |
sources = []
|
| 414 |
for doc in docs[:30]:
|
|
|
|
| 423 |
base_prompt = get_prompt_for_provider("linear_rag", self.provider)
|
| 424 |
qa_system_prompt = base_prompt.format(
|
| 425 |
repo_name=self.repo_name,
|
| 426 |
+
context=full_context
|
| 427 |
)
|
| 428 |
|
| 429 |
# Build messages with history
|