hashan-7 commited on
Commit
1ea7c95
·
verified ·
1 Parent(s): d74d9aa

update code

Browse files
Files changed (1) hide show
  1. code_retriever.py +45 -4
code_retriever.py CHANGED
@@ -2,7 +2,7 @@ from typing import List
2
 
3
  from github_search import search_github
4
  from stack_search import search_stackoverflow
5
- from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence
6
  from config import settings
7
 
8
 
@@ -30,14 +30,53 @@ def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[Retriev
30
  return unique_items
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
34
  return sorted(
35
  evidence_list,
36
- key=lambda item: (item.score is not None, item.score if item.score is not None else -1),
37
  reverse=True,
38
  )
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]:
42
  if task_type != CodeTaskType.FIX:
43
  return []
@@ -50,11 +89,12 @@ def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> Li
50
  error_message=request.error_message,
51
  language=request.language,
52
  framework=request.framework,
 
53
  max_results=settings.MAX_STACK_RESULTS,
54
  )
55
  collected.extend(stack_results)
56
 
57
- if settings.ENABLE_GITHUB_SEARCH:
58
  github_results = search_github(
59
  message=request.message,
60
  error_message=request.error_message,
@@ -66,5 +106,6 @@ def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> Li
66
 
67
  unique_items = deduplicate_evidence(collected)
68
  ranked_items = sort_evidence(unique_items)
 
69
 
70
- return ranked_items[: settings.MAX_RETRIEVED_ITEMS]
 
2
 
3
  from github_search import search_github
4
  from stack_search import search_stackoverflow
5
+ from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType
6
  from config import settings
7
 
8
 
 
30
  return unique_items
31
 
32
 
33
+ def source_priority(source_type: SourceType) -> int:
34
+ if source_type == SourceType.STACKOVERFLOW:
35
+ return 2
36
+ if source_type == SourceType.GITHUB:
37
+ return 1
38
+ return 0
39
+
40
+
41
+ def score_evidence(item: RetrievedEvidence) -> float:
42
+ base = item.score if item.score is not None else 0.0
43
+ return base + source_priority(item.source_type)
44
+
45
+
46
  def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
47
  return sorted(
48
  evidence_list,
49
+ key=lambda item: score_evidence(item),
50
  reverse=True,
51
  )
52
 
53
 
54
+ def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
55
+ stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW]
56
+ github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB]
57
+
58
+ stack_items = stack_items[:3]
59
+ github_items = github_items[:2]
60
+
61
+ combined = stack_items + github_items
62
+ combined = sort_evidence(combined)
63
+
64
+ return combined[: settings.MAX_RETRIEVED_ITEMS]
65
+
66
+
67
+ def should_use_github(request: CodeXRequest) -> bool:
68
+ if not settings.ENABLE_GITHUB_SEARCH:
69
+ return False
70
+
71
+ if request.framework and request.framework.strip():
72
+ return True
73
+
74
+ if request.error_message and request.error_message.strip():
75
+ return True
76
+
77
+ return False
78
+
79
+
80
  def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]:
81
  if task_type != CodeTaskType.FIX:
82
  return []
 
89
  error_message=request.error_message,
90
  language=request.language,
91
  framework=request.framework,
92
+ code=request.code,
93
  max_results=settings.MAX_STACK_RESULTS,
94
  )
95
  collected.extend(stack_results)
96
 
97
+ if should_use_github(request):
98
  github_results = search_github(
99
  message=request.message,
100
  error_message=request.error_message,
 
106
 
107
  unique_items = deduplicate_evidence(collected)
108
  ranked_items = sort_evidence(unique_items)
109
+ final_items = trim_evidence(ranked_items)
110
 
111
+ return final_items