Chat7-CodeX-Backend / code_retriever.py
hashan-7's picture
update code
1ea7c95 verified
raw
history blame
3.21 kB
from typing import List
from github_search import search_github
from stack_search import search_stackoverflow
from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType
from config import settings
def normalize_text(text: str) -> str:
return (text or "").strip().lower()
def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
seen = set()
unique_items: List[RetrievedEvidence] = []
for item in evidence_list:
key = (
normalize_text(item.title),
normalize_text(item.url or ""),
item.source_type.value,
)
if key in seen:
continue
seen.add(key)
unique_items.append(item)
return unique_items
def source_priority(source_type: SourceType) -> int:
if source_type == SourceType.STACKOVERFLOW:
return 2
if source_type == SourceType.GITHUB:
return 1
return 0
def score_evidence(item: RetrievedEvidence) -> float:
base = item.score if item.score is not None else 0.0
return base + source_priority(item.source_type)
def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
return sorted(
evidence_list,
key=lambda item: score_evidence(item),
reverse=True,
)
def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW]
github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB]
stack_items = stack_items[:3]
github_items = github_items[:2]
combined = stack_items + github_items
combined = sort_evidence(combined)
return combined[: settings.MAX_RETRIEVED_ITEMS]
def should_use_github(request: CodeXRequest) -> bool:
if not settings.ENABLE_GITHUB_SEARCH:
return False
if request.framework and request.framework.strip():
return True
if request.error_message and request.error_message.strip():
return True
return False
def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]:
if task_type != CodeTaskType.FIX:
return []
collected: List[RetrievedEvidence] = []
if settings.ENABLE_STACK_SEARCH:
stack_results = search_stackoverflow(
message=request.message,
error_message=request.error_message,
language=request.language,
framework=request.framework,
code=request.code,
max_results=settings.MAX_STACK_RESULTS,
)
collected.extend(stack_results)
if should_use_github(request):
github_results = search_github(
message=request.message,
error_message=request.error_message,
language=request.language,
framework=request.framework,
max_results=settings.MAX_GITHUB_RESULTS,
)
collected.extend(github_results)
unique_items = deduplicate_evidence(collected)
ranked_items = sort_evidence(unique_items)
final_items = trim_evidence(ranked_items)
return final_items