KrishnaCosmic commited on
Commit
db8ee02
·
1 Parent(s): 7414af1

checking changes

Browse files
Files changed (1) hide show
  1. services/rag_chatbot_service.py +73 -18
services/rag_chatbot_service.py CHANGED
@@ -6,6 +6,7 @@ import logging
6
  from typing import List, Dict, Any, Optional
7
  from datetime import datetime, timezone
8
  from pydantic import BaseModel
 
9
 
10
  from config.settings import settings
11
 
@@ -34,9 +35,53 @@ class RAGChatbotService:
34
  This implementation uses in-memory search as fallback.
35
  """
36
 
 
 
 
37
  def __init__(self):
38
  self.use_vector_db = False # Set True when ChromaDB is available
39
  self._embeddings_cache = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  async def answer_question(
42
  self,
@@ -74,28 +119,38 @@ class RAGChatbotService:
74
 
75
  # If no indexed content, try to fetch README directly from GitHub
76
  if not has_indexed_content:
77
- logger.info(f"No indexed content for {repo_name}, fetching README directly from GitHub...")
78
- try:
79
- # Direct HTTP request to GitHub API
80
- owner, repo = repo_name.split('/')
81
- url = f"https://raw.githubusercontent.com/{owner}/{repo}/main/README.md"
82
-
83
- async with httpx.AsyncClient(timeout=10) as client:
84
- response = await client.get(url)
85
- if response.status_code == 200:
86
- readme_content = response.text
87
- logger.info(f"Successfully fetched README for {repo_name} ({len(readme_content)} chars)")
88
- else:
89
- # Try master branch instead
90
- url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/README.md"
91
  response = await client.get(url)
92
  if response.status_code == 200:
93
  readme_content = response.text
94
- logger.info(f"Successfully fetched README (master) for {repo_name} ({len(readme_content)} chars)")
 
 
95
  else:
96
- logger.warning(f"README not found at {url}")
97
- except Exception as e:
98
- logger.error(f"Error fetching README for {repo_name}: {e}")
 
 
 
 
 
 
 
 
 
99
 
100
  # Search for relevant documents (from indexed chunks)
101
  relevant_docs = await self.search_documents(question, repo_name, top_k)
 
6
  from typing import List, Dict, Any, Optional
7
  from datetime import datetime, timezone
8
  from pydantic import BaseModel
9
+ import time
10
 
11
  from config.settings import settings
12
 
 
35
  This implementation uses in-memory search as fallback.
36
  """
37
 
38
+ # Cache settings
39
+ README_CACHE_TTL = 600 # 10 minutes in seconds
40
+
41
  def __init__(self):
42
  self.use_vector_db = False # Set True when ChromaDB is available
43
  self._embeddings_cache = {}
44
+ self._readme_cache = {} # {repo_key: {"content": str, "timestamp": float}}
45
+
46
+ def _get_cached_readme(self, repo_name: str) -> Optional[str]:
47
+ """
48
+ Get cached README if it exists and hasn't expired.
49
+
50
+ Args:
51
+ repo_name: Repository name (owner/repo format)
52
+
53
+ Returns:
54
+ README content if cached and valid, None otherwise
55
+ """
56
+ if repo_name not in self._readme_cache:
57
+ return None
58
+
59
+ cache_entry = self._readme_cache[repo_name]
60
+ age = time.time() - cache_entry["timestamp"]
61
+
62
+ if age > self.README_CACHE_TTL:
63
+ # Cache expired, remove it
64
+ del self._readme_cache[repo_name]
65
+ logger.info(f"README cache expired for {repo_name} (age: {age:.1f}s)")
66
+ return None
67
+
68
+ logger.info(f"✅ Serving README from cache for {repo_name} (age: {age:.1f}s)")
69
+ return cache_entry["content"]
70
+
71
+ def _cache_readme(self, repo_name: str, content: str) -> None:
72
+ """
73
+ Cache README content with timestamp.
74
+
75
+ Args:
76
+ repo_name: Repository name (owner/repo format)
77
+ content: README content to cache
78
+ """
79
+ self._readme_cache[repo_name] = {
80
+ "content": content,
81
+ "timestamp": time.time()
82
+ }
83
+ logger.info(f"📝 Cached README for {repo_name} ({len(content)} chars)")
84
+
85
 
86
  async def answer_question(
87
  self,
 
119
 
120
  # If no indexed content, try to fetch README directly from GitHub
121
  if not has_indexed_content:
122
+ logger.info(f"No indexed content for {repo_name}, checking cache and fetching README if needed...")
123
+
124
+ # Stage 1: Check cache first
125
+ cached_readme = self._get_cached_readme(repo_name)
126
+ if cached_readme:
127
+ readme_content = cached_readme
128
+ else:
129
+ # Cache miss - fetch from GitHub
130
+ try:
131
+ owner, repo = repo_name.split('/')
132
+ url = f"https://raw.githubusercontent.com/{owner}/{repo}/main/README.md"
133
+
134
+ async with httpx.AsyncClient(timeout=10) as client:
 
135
  response = await client.get(url)
136
  if response.status_code == 200:
137
  readme_content = response.text
138
+ # Cache the fetched content
139
+ self._cache_readme(repo_name, readme_content)
140
+ logger.info(f"Successfully fetched README for {repo_name} ({len(readme_content)} chars)")
141
  else:
142
+ # Try master branch instead
143
+ url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/README.md"
144
+ response = await client.get(url)
145
+ if response.status_code == 200:
146
+ readme_content = response.text
147
+ # Cache the fetched content
148
+ self._cache_readme(repo_name, readme_content)
149
+ logger.info(f"Successfully fetched README (master) for {repo_name} ({len(readme_content)} chars)")
150
+ else:
151
+ logger.warning(f"README not found at {url}")
152
+ except Exception as e:
153
+ logger.error(f"Error fetching README for {repo_name}: {e}")
154
 
155
  # Search for relevant documents (from indexed chunks)
156
  relevant_docs = await self.search_documents(question, repo_name, top_k)