Chat7-CodeX-Backend / code_retriever.py
hashan-7's picture
Update code
4027d07 verified
from typing import List
from github_search import search_github
from stack_search import search_stackoverflow
from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType
from config import settings
def normalize_text(text: str) -> str:
return (text or "").strip().lower()
def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
seen = set()
unique_items: List[RetrievedEvidence] = []
for item in evidence_list:
key = (
normalize_text(item.title),
normalize_text(item.url or ""),
item.source_type.value,
)
if key in seen:
continue
seen.add(key)
unique_items.append(item)
return unique_items
def source_priority(source_type: SourceType) -> int:
if source_type == SourceType.STACKOVERFLOW:
return 2
if source_type == SourceType.GITHUB:
return 1
return 0
def score_evidence(item: RetrievedEvidence) -> float:
base = item.score if item.score is not None else 0.0
return base + source_priority(item.source_type)
def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
return sorted(
evidence_list,
key=lambda item: score_evidence(item),
reverse=True,
)
def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW]
github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB]
other_items = [
item for item in evidence_list
if item.source_type not in {SourceType.STACKOVERFLOW, SourceType.GITHUB}
]
stack_items = stack_items[:3]
github_items = github_items[:3]
combined = stack_items + github_items + other_items
combined = sort_evidence(combined)
return combined[: settings.MAX_RETRIEVED_ITEMS]
def should_use_stack(task_type: CodeTaskType) -> bool:
if not settings.ENABLE_STACK_SEARCH:
return False
return task_type in {
CodeTaskType.FIX,
CodeTaskType.REVIEW,
CodeTaskType.REFACTOR,
}
def should_use_github(task_type: CodeTaskType, request: CodeXRequest) -> bool:
if not settings.ENABLE_GITHUB_SEARCH:
return False
if task_type == CodeTaskType.FIX:
return bool(
(request.framework and request.framework.strip())
or (request.error_message and request.error_message.strip())
or (request.language and request.language.strip())
)
if task_type in {CodeTaskType.REVIEW, CodeTaskType.REFACTOR}:
return bool(
(request.framework and request.framework.strip())
or (request.language and request.language.strip())
)
return False
def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]:
supported_tasks = {
CodeTaskType.FIX,
CodeTaskType.REVIEW,
CodeTaskType.REFACTOR,
}
if task_type not in supported_tasks:
return []
collected: List[RetrievedEvidence] = []
if should_use_stack(task_type):
stack_results = search_stackoverflow(
message=request.message,
error_message=request.error_message,
language=request.language,
framework=request.framework,
code=request.code,
max_results=settings.MAX_STACK_RESULTS,
)
collected.extend(stack_results)
if should_use_github(task_type, request):
github_results = search_github(
message=request.message,
error_message=request.error_message,
language=request.language,
framework=request.framework,
max_results=settings.MAX_GITHUB_RESULTS,
)
collected.extend(github_results)
unique_items = deduplicate_evidence(collected)
ranked_items = sort_evidence(unique_items)
final_items = trim_evidence(ranked_items)
return final_items