hashan-7 commited on
Commit
049f60c
·
verified ·
1 Parent(s): e21f24d

add the code

Browse files
Files changed (1) hide show
  1. code_retriever.py +70 -0
code_retriever.py CHANGED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from github_search import search_github
4
+ from stack_search import search_stackoverflow
5
+ from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence
6
+ from config import settings
7
+
8
+
9
+ def normalize_text(text: str) -> str:
10
+ return (text or "").strip().lower()
11
+
12
+
13
+ def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
14
+ seen = set()
15
+ unique_items: List[RetrievedEvidence] = []
16
+
17
+ for item in evidence_list:
18
+ key = (
19
+ normalize_text(item.title),
20
+ normalize_text(item.url or ""),
21
+ item.source_type.value,
22
+ )
23
+
24
+ if key in seen:
25
+ continue
26
+
27
+ seen.add(key)
28
+ unique_items.append(item)
29
+
30
+ return unique_items
31
+
32
+
33
+ def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
34
+ return sorted(
35
+ evidence_list,
36
+ key=lambda item: (item.score is not None, item.score if item.score is not None else -1),
37
+ reverse=True,
38
+ )
39
+
40
+
41
+ def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]:
42
+ if task_type != CodeTaskType.FIX:
43
+ return []
44
+
45
+ collected: List[RetrievedEvidence] = []
46
+
47
+ if settings.ENABLE_STACK_SEARCH:
48
+ stack_results = search_stackoverflow(
49
+ message=request.message,
50
+ error_message=request.error_message,
51
+ language=request.language,
52
+ framework=request.framework,
53
+ max_results=settings.MAX_STACK_RESULTS,
54
+ )
55
+ collected.extend(stack_results)
56
+
57
+ if settings.ENABLE_GITHUB_SEARCH:
58
+ github_results = search_github(
59
+ message=request.message,
60
+ error_message=request.error_message,
61
+ language=request.language,
62
+ framework=request.framework,
63
+ max_results=settings.MAX_GITHUB_RESULTS,
64
+ )
65
+ collected.extend(github_results)
66
+
67
+ unique_items = deduplicate_evidence(collected)
68
+ ranked_items = sort_evidence(unique_items)
69
+
70
+ return ranked_items[: settings.MAX_RETRIEVED_ITEMS]