| from typing import List |
|
|
| from github_search import search_github |
| from stack_search import search_stackoverflow |
| from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType |
| from config import settings |
|
|
|
|
| def normalize_text(text: str) -> str: |
| return (text or "").strip().lower() |
|
|
|
|
| def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: |
| seen = set() |
| unique_items: List[RetrievedEvidence] = [] |
|
|
| for item in evidence_list: |
| key = ( |
| normalize_text(item.title), |
| normalize_text(item.url or ""), |
| item.source_type.value, |
| ) |
|
|
| if key in seen: |
| continue |
|
|
| seen.add(key) |
| unique_items.append(item) |
|
|
| return unique_items |
|
|
|
|
| def source_priority(source_type: SourceType) -> int: |
| if source_type == SourceType.STACKOVERFLOW: |
| return 2 |
| if source_type == SourceType.GITHUB: |
| return 1 |
| return 0 |
|
|
|
|
| def score_evidence(item: RetrievedEvidence) -> float: |
| base = item.score if item.score is not None else 0.0 |
| return base + source_priority(item.source_type) |
|
|
|
|
| def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: |
| return sorted( |
| evidence_list, |
| key=lambda item: score_evidence(item), |
| reverse=True, |
| ) |
|
|
|
|
| def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: |
| stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW] |
| github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB] |
|
|
| stack_items = stack_items[:3] |
| github_items = github_items[:2] |
|
|
| combined = stack_items + github_items |
| combined = sort_evidence(combined) |
|
|
| return combined[: settings.MAX_RETRIEVED_ITEMS] |
|
|
|
|
| def should_use_github(request: CodeXRequest) -> bool: |
| if not settings.ENABLE_GITHUB_SEARCH: |
| return False |
|
|
| if request.framework and request.framework.strip(): |
| return True |
|
|
| if request.error_message and request.error_message.strip(): |
| return True |
|
|
| return False |
|
|
|
|
| def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]: |
| if task_type != CodeTaskType.FIX: |
| return [] |
|
|
| collected: List[RetrievedEvidence] = [] |
|
|
| if settings.ENABLE_STACK_SEARCH: |
| stack_results = search_stackoverflow( |
| message=request.message, |
| error_message=request.error_message, |
| language=request.language, |
| framework=request.framework, |
| code=request.code, |
| max_results=settings.MAX_STACK_RESULTS, |
| ) |
| collected.extend(stack_results) |
|
|
| if should_use_github(request): |
| github_results = search_github( |
| message=request.message, |
| error_message=request.error_message, |
| language=request.language, |
| framework=request.framework, |
| max_results=settings.MAX_GITHUB_RESULTS, |
| ) |
| collected.extend(github_results) |
|
|
| unique_items = deduplicate_evidence(collected) |
| ranked_items = sort_evidence(unique_items) |
| final_items = trim_evidence(ranked_items) |
|
|
| return final_items |