File size: 4,093 Bytes
049f60c 1ea7c95 049f60c 1ea7c95 049f60c 1ea7c95 049f60c 1ea7c95 4027d07 1ea7c95 4027d07 1ea7c95 4027d07 1ea7c95 4027d07 1ea7c95 4027d07 1ea7c95 4027d07 1ea7c95 049f60c 4027d07 049f60c 4027d07 049f60c 1ea7c95 049f60c 4027d07 049f60c 1ea7c95 049f60c 1ea7c95 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | from typing import List
from github_search import search_github
from stack_search import search_stackoverflow
from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType
from config import settings
def normalize_text(text: str) -> str:
return (text or "").strip().lower()
def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
seen = set()
unique_items: List[RetrievedEvidence] = []
for item in evidence_list:
key = (
normalize_text(item.title),
normalize_text(item.url or ""),
item.source_type.value,
)
if key in seen:
continue
seen.add(key)
unique_items.append(item)
return unique_items
def source_priority(source_type: SourceType) -> int:
if source_type == SourceType.STACKOVERFLOW:
return 2
if source_type == SourceType.GITHUB:
return 1
return 0
def score_evidence(item: RetrievedEvidence) -> float:
base = item.score if item.score is not None else 0.0
return base + source_priority(item.source_type)
def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
return sorted(
evidence_list,
key=lambda item: score_evidence(item),
reverse=True,
)
def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW]
github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB]
other_items = [
item for item in evidence_list
if item.source_type not in {SourceType.STACKOVERFLOW, SourceType.GITHUB}
]
stack_items = stack_items[:3]
github_items = github_items[:3]
combined = stack_items + github_items + other_items
combined = sort_evidence(combined)
return combined[: settings.MAX_RETRIEVED_ITEMS]
def should_use_stack(task_type: CodeTaskType) -> bool:
if not settings.ENABLE_STACK_SEARCH:
return False
return task_type in {
CodeTaskType.FIX,
CodeTaskType.REVIEW,
CodeTaskType.REFACTOR,
}
def should_use_github(task_type: CodeTaskType, request: CodeXRequest) -> bool:
if not settings.ENABLE_GITHUB_SEARCH:
return False
if task_type == CodeTaskType.FIX:
return bool(
(request.framework and request.framework.strip())
or (request.error_message and request.error_message.strip())
or (request.language and request.language.strip())
)
if task_type in {CodeTaskType.REVIEW, CodeTaskType.REFACTOR}:
return bool(
(request.framework and request.framework.strip())
or (request.language and request.language.strip())
)
return False
def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]:
supported_tasks = {
CodeTaskType.FIX,
CodeTaskType.REVIEW,
CodeTaskType.REFACTOR,
}
if task_type not in supported_tasks:
return []
collected: List[RetrievedEvidence] = []
if should_use_stack(task_type):
stack_results = search_stackoverflow(
message=request.message,
error_message=request.error_message,
language=request.language,
framework=request.framework,
code=request.code,
max_results=settings.MAX_STACK_RESULTS,
)
collected.extend(stack_results)
if should_use_github(task_type, request):
github_results = search_github(
message=request.message,
error_message=request.error_message,
language=request.language,
framework=request.framework,
max_results=settings.MAX_GITHUB_RESULTS,
)
collected.extend(github_results)
unique_items = deduplicate_evidence(collected)
ranked_items = sort_evidence(unique_items)
final_items = trim_evidence(ranked_items)
return final_items |