from typing import List from github_search import search_github from stack_search import search_stackoverflow from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType from config import settings def normalize_text(text: str) -> str: return (text or "").strip().lower() def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: seen = set() unique_items: List[RetrievedEvidence] = [] for item in evidence_list: key = ( normalize_text(item.title), normalize_text(item.url or ""), item.source_type.value, ) if key in seen: continue seen.add(key) unique_items.append(item) return unique_items def source_priority(source_type: SourceType) -> int: if source_type == SourceType.STACKOVERFLOW: return 2 if source_type == SourceType.GITHUB: return 1 return 0 def score_evidence(item: RetrievedEvidence) -> float: base = item.score if item.score is not None else 0.0 return base + source_priority(item.source_type) def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: return sorted( evidence_list, key=lambda item: score_evidence(item), reverse=True, ) def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW] github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB] stack_items = stack_items[:3] github_items = github_items[:2] combined = stack_items + github_items combined = sort_evidence(combined) return combined[: settings.MAX_RETRIEVED_ITEMS] def should_use_github(request: CodeXRequest) -> bool: if not settings.ENABLE_GITHUB_SEARCH: return False if request.framework and request.framework.strip(): return True if request.error_message and request.error_message.strip(): return True return False def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]: if task_type != CodeTaskType.FIX: return [] collected: List[RetrievedEvidence] = [] if settings.ENABLE_STACK_SEARCH: stack_results = search_stackoverflow( message=request.message, error_message=request.error_message, language=request.language, framework=request.framework, code=request.code, max_results=settings.MAX_STACK_RESULTS, ) collected.extend(stack_results) if should_use_github(request): github_results = search_github( message=request.message, error_message=request.error_message, language=request.language, framework=request.framework, max_results=settings.MAX_GITHUB_RESULTS, ) collected.extend(github_results) unique_items = deduplicate_evidence(collected) ranked_items = sort_evidence(unique_items) final_items = trim_evidence(ranked_items) return final_items