KrishnaCosmic commited on
Commit
8ff58c6
·
1 Parent(s): 07355a1

Fix: RAG auto-fetches README, remove unused chat_history

Browse files
Files changed (3) hide show
  1. main.py +2 -1
  2. models/__init__.py +2 -3
  3. services/rag_chatbot_service.py +42 -2
main.py CHANGED
@@ -232,7 +232,8 @@ async def rag_chat(request: RAGChatRequest):
232
  result = await rag_chatbot_service.answer_question(
233
  question=request.question,
234
  repo_name=request.repo_name,
235
- top_k=request.top_k
 
236
  )
237
  return result
238
  except Exception as e:
 
232
  result = await rag_chatbot_service.answer_question(
233
  question=request.question,
234
  repo_name=request.repo_name,
235
+ top_k=request.top_k,
236
+ github_access_token=request.github_access_token
237
  )
238
  return result
239
  except Exception as e:
models/__init__.py CHANGED
@@ -2,7 +2,7 @@ from .user import User, UserRole
2
  from .repository import Repository
3
  from .issue import Issue
4
  from .triage import IssueTriageData, Template, Classification, Sentiment
5
- from .chat import ChatHistory
6
 
7
  __all__ = [
8
  'User',
@@ -12,6 +12,5 @@ __all__ = [
12
  'IssueTriageData',
13
  'Template',
14
  'Classification',
15
- 'Sentiment',
16
- 'ChatHistory'
17
  ]
 
2
  from .repository import Repository
3
  from .issue import Issue
4
  from .triage import IssueTriageData, Template, Classification, Sentiment
5
+ # ChatHistory removed - not used (AI chat uses ephemeral history)
6
 
7
  __all__ = [
8
  'User',
 
12
  'IssueTriageData',
13
  'Template',
14
  'Classification',
15
+ 'Sentiment'
 
16
  ]
services/rag_chatbot_service.py CHANGED
@@ -42,7 +42,8 @@ class RAGChatbotService:
42
  self,
43
  question: str,
44
  repo_name: Optional[str] = None,
45
- top_k: int = 5
 
46
  ) -> RAGAnswer:
47
  """
48
  Answer a question using RAG.
@@ -51,16 +52,55 @@ class RAGChatbotService:
51
  question: The question to answer
52
  repo_name: Optional repo context
53
  top_k: Number of documents to retrieve
 
54
 
55
  Returns:
56
  RAGAnswer with the response and sources
57
  """
58
- # Search for relevant documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  relevant_docs = await self.search_documents(question, repo_name, top_k)
60
 
61
  # Build context from documents
62
  context = self._build_context(relevant_docs)
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # Generate answer using AI
65
  answer, confidence = await self._generate_answer(question, context, repo_name)
66
 
 
42
  self,
43
  question: str,
44
  repo_name: Optional[str] = None,
45
+ top_k: int = 5,
46
+ github_access_token: Optional[str] = None
47
  ) -> RAGAnswer:
48
  """
49
  Answer a question using RAG.
 
52
  question: The question to answer
53
  repo_name: Optional repo context
54
  top_k: Number of documents to retrieve
55
+ github_access_token: Optional GitHub token for README fetching
56
 
57
  Returns:
58
  RAGAnswer with the response and sources
59
  """
60
+ from config.database import db
61
+
62
+ # Check if we have any indexed content for this repo
63
+ has_indexed_content = False
64
+ readme_content = None
65
+
66
+ if repo_name:
67
+ # Check for existing RAG chunks
68
+ existing_chunks = await db.rag_chunks.count_documents({"sourceRepo": repo_name})
69
+ has_indexed_content = existing_chunks > 0
70
+
71
+ # If no indexed content, try to fetch README directly from GitHub
72
+ if not has_indexed_content:
73
+ logger.info(f"No indexed content for {repo_name}, fetching README directly...")
74
+ try:
75
+ from services.github_service import github_service
76
+ readme_content = await github_service.fetch_repository_readme(
77
+ repo_name,
78
+ github_access_token
79
+ )
80
+ if readme_content:
81
+ logger.info(f"Fetched README for {repo_name} ({len(readme_content)} chars)")
82
+ except Exception as e:
83
+ logger.warning(f"Could not fetch README for {repo_name}: {e}")
84
+
85
+ # Search for relevant documents (from indexed chunks)
86
  relevant_docs = await self.search_documents(question, repo_name, top_k)
87
 
88
  # Build context from documents
89
  context = self._build_context(relevant_docs)
90
 
91
+ # If we have a fresh README but no indexed content, prepend it to context
92
+ if readme_content and not has_indexed_content:
93
+ # Truncate README if too long (keep first 4000 chars)
94
+ truncated_readme = readme_content[:4000] if len(readme_content) > 4000 else readme_content
95
+ context = f"[PROJECT README]\n{truncated_readme}\n\n---\n\n{context}"
96
+ # Add README to sources
97
+ relevant_docs.insert(0, {
98
+ "id": f"{repo_name}_readme_live",
99
+ "title": "Project README (Live)",
100
+ "type": "readme",
101
+ "relevance": 1.0
102
+ })
103
+
104
  # Generate answer using AI
105
  answer, confidence = await self._generate_answer(question, context, repo_name)
106