KrishnaCosmic commited on
Commit
a8b3a89
·
1 Parent(s): c7355e0

Fix missing await for async AI service calls

Browse files
services/github_service.py CHANGED
@@ -438,6 +438,54 @@ class GitHubService:
438
  except Exception as e:
439
  logger.error(f"README fetch error for {repo_full_name}: {e}")
440
  return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
 
443
  # Singleton instance
 
438
  except Exception as e:
439
  logger.error(f"README fetch error for {repo_full_name}: {e}")
440
  return ""
441
+
442
+ async def fetch_contributing_file(self, repo_full_name: str, github_access_token: Optional[str] = None) -> str:
443
+ """
444
+ Fetch the CONTRIBUTING.md content for a repository.
445
+ Tries multiple common paths: CONTRIBUTING.md, .github/CONTRIBUTING.md, docs/CONTRIBUTING.md
446
+ """
447
+ paths_to_try = [
448
+ "CONTRIBUTING.md",
449
+ ".github/CONTRIBUTING.md",
450
+ "docs/CONTRIBUTING.md",
451
+ "contributing.md",
452
+ ]
453
+
454
+ try:
455
+ async with httpx.AsyncClient() as client:
456
+ headers = {"Accept": "application/vnd.github.raw+json"}
457
+ if github_access_token:
458
+ headers["Authorization"] = f"Bearer {github_access_token}"
459
+
460
+ for path in paths_to_try:
461
+ url = f"{self.base_url}/repos/{repo_full_name}/contents/{path}"
462
+ response = await client.get(url, headers=headers, timeout=30.0)
463
+
464
+ if response.status_code == 200:
465
+ return response.text
466
+
467
+ return ""
468
+
469
+ except Exception as e:
470
+ logger.error(f"CONTRIBUTING fetch error for {repo_full_name}: {e}")
471
+ return ""
472
+
473
+ async def fetch_repository_docs(
474
+ self,
475
+ repo_full_name: str,
476
+ github_access_token: Optional[str] = None
477
+ ) -> Dict[str, str]:
478
+ """
479
+ Fetch README and CONTRIBUTING files for RAG indexing.
480
+ Returns dict with 'readme' and 'contributing' keys.
481
+ """
482
+ readme = await self.fetch_repository_readme(repo_full_name, github_access_token)
483
+ contributing = await self.fetch_contributing_file(repo_full_name, github_access_token)
484
+
485
+ return {
486
+ "readme": readme,
487
+ "contributing": contributing
488
+ }
489
 
490
 
491
  # Singleton instance
services/rag_data_prep.py CHANGED
@@ -93,6 +93,53 @@ class RAGDataPrep:
93
 
94
  return text.strip()
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def _simple_tokenize(self, text: str) -> List[str]:
97
  """
98
  Simple word-based tokenization.
@@ -225,7 +272,7 @@ class RAGDataPrep:
225
  Fetch documents from MongoDB for RAG preparation.
226
 
227
  Args:
228
- doc_types: Types to fetch (issue, pr, comment)
229
  repo_names: Optional filter by repository
230
 
231
  Returns:
@@ -233,29 +280,55 @@ class RAGDataPrep:
233
  """
234
  from config.database import db
235
 
236
- doc_types = doc_types or ["issue", "pr", "comment", "readme"]
237
  documents = []
238
 
239
- if "readme" in doc_types and repo_names:
 
240
  from services.github_service import github_service
241
 
242
  for repo in repo_names:
243
- try:
244
- content = await github_service.fetch_repository_readme(repo, github_access_token)
245
- if content:
246
- documents.append({
247
- "document_id": f"{repo}_readme",
248
- "document_type": "readme",
249
- "source_repo": repo,
250
- "title": "Project README",
251
- "body": content,
252
- "author": "System",
253
- "number": 0,
254
- "state": "active",
255
- "created_at": datetime.now(timezone.utc).isoformat()
256
- })
257
- except Exception as e:
258
- logger.error(f"Failed to fetch README for {repo}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
  if "issue" in doc_types or "pr" in doc_types:
261
  query = {}
 
93
 
94
  return text.strip()
95
 
96
+ def _detect_priority_sections(self, content: str, doc_type: str) -> str:
97
+ """
98
+ Detect if content contains high-priority sections for contributor context.
99
+
100
+ Args:
101
+ content: Document content
102
+ doc_type: Type of document (readme, contributing)
103
+
104
+ Returns:
105
+ Priority level: 'high', 'medium', or 'normal'
106
+ """
107
+ if doc_type == "contributing":
108
+ return "high" # CONTRIBUTING.md is always high priority
109
+
110
+ content_lower = content.lower()
111
+ high_priority_patterns = [
112
+ "getting started",
113
+ "project setup",
114
+ "installation",
115
+ "how to contribute",
116
+ "contributor guidelines",
117
+ "development setup",
118
+ "quick start",
119
+ "for contributors",
120
+ "contributing",
121
+ ]
122
+
123
+ medium_priority_patterns = [
124
+ "requirements",
125
+ "dependencies",
126
+ "building",
127
+ "testing",
128
+ "documentation",
129
+ ]
130
+
131
+ # Check for high-priority sections
132
+ high_count = sum(1 for pattern in high_priority_patterns if pattern in content_lower)
133
+ if high_count >= 2:
134
+ return "high"
135
+
136
+ # Check for medium-priority sections
137
+ medium_count = sum(1 for pattern in medium_priority_patterns if pattern in content_lower)
138
+ if high_count >= 1 or medium_count >= 2:
139
+ return "medium"
140
+
141
+ return "normal"
142
+
143
  def _simple_tokenize(self, text: str) -> List[str]:
144
  """
145
  Simple word-based tokenization.
 
272
  Fetch documents from MongoDB for RAG preparation.
273
 
274
  Args:
275
+ doc_types: Types to fetch (issue, pr, comment, readme, contributing)
276
  repo_names: Optional filter by repository
277
 
278
  Returns:
 
280
  """
281
  from config.database import db
282
 
283
+ doc_types = doc_types or ["issue", "pr", "comment", "readme", "contributing"]
284
  documents = []
285
 
286
+ # Fetch README and CONTRIBUTING files with high priority
287
+ if repo_names:
288
  from services.github_service import github_service
289
 
290
  for repo in repo_names:
291
+ # Fetch README
292
+ if "readme" in doc_types:
293
+ try:
294
+ content = await github_service.fetch_repository_readme(repo, github_access_token)
295
+ if content:
296
+ # Extract key sections for high priority tagging
297
+ priority = self._detect_priority_sections(content, "readme")
298
+ documents.append({
299
+ "document_id": f"{repo}_readme",
300
+ "document_type": "readme",
301
+ "source_repo": repo,
302
+ "title": "Project README",
303
+ "body": content,
304
+ "author": "System",
305
+ "number": 0,
306
+ "state": "active",
307
+ "priority": priority,
308
+ "created_at": datetime.now(timezone.utc).isoformat()
309
+ })
310
+ except Exception as e:
311
+ logger.error(f"Failed to fetch README for {repo}: {e}")
312
+
313
+ # Fetch CONTRIBUTING.md (high priority for contributor context)
314
+ if "contributing" in doc_types:
315
+ try:
316
+ content = await github_service.fetch_contributing_file(repo, github_access_token)
317
+ if content:
318
+ documents.append({
319
+ "document_id": f"{repo}_contributing",
320
+ "document_type": "contributing",
321
+ "source_repo": repo,
322
+ "title": "Contributor Guidelines",
323
+ "body": content,
324
+ "author": "System",
325
+ "number": 0,
326
+ "state": "active",
327
+ "priority": "high", # Always high priority
328
+ "created_at": datetime.now(timezone.utc).isoformat()
329
+ })
330
+ except Exception as e:
331
+ logger.error(f"Failed to fetch CONTRIBUTING for {repo}: {e}")
332
 
333
  if "issue" in doc_types or "pr" in doc_types:
334
  query = {}