| from typing import List |
|
|
| from github_search import search_github |
| from stack_search import search_stackoverflow |
| from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType |
| from config import settings |
|
|
|
|
| def normalize_text(text: str) -> str: |
| return (text or "").strip().lower() |
|
|
|
|
| def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: |
| seen = set() |
| unique_items: List[RetrievedEvidence] = [] |
|
|
| for item in evidence_list: |
| key = ( |
| normalize_text(item.title), |
| normalize_text(item.url or ""), |
| item.source_type.value, |
| ) |
|
|
| if key in seen: |
| continue |
|
|
| seen.add(key) |
| unique_items.append(item) |
|
|
| return unique_items |
|
|
|
|
| def source_priority(source_type: SourceType) -> int: |
| if source_type == SourceType.STACKOVERFLOW: |
| return 2 |
| if source_type == SourceType.GITHUB: |
| return 1 |
| return 0 |
|
|
|
|
| def score_evidence(item: RetrievedEvidence) -> float: |
| base = item.score if item.score is not None else 0.0 |
| return base + source_priority(item.source_type) |
|
|
|
|
| def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: |
| return sorted( |
| evidence_list, |
| key=lambda item: score_evidence(item), |
| reverse=True, |
| ) |
|
|
|
|
| def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]: |
| stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW] |
| github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB] |
| other_items = [ |
| item for item in evidence_list |
| if item.source_type not in {SourceType.STACKOVERFLOW, SourceType.GITHUB} |
| ] |
|
|
| stack_items = stack_items[:3] |
| github_items = github_items[:3] |
|
|
| combined = stack_items + github_items + other_items |
| combined = sort_evidence(combined) |
|
|
| return combined[: settings.MAX_RETRIEVED_ITEMS] |
|
|
|
|
| def should_use_stack(task_type: CodeTaskType) -> bool: |
| if not settings.ENABLE_STACK_SEARCH: |
| return False |
|
|
| return task_type in { |
| CodeTaskType.FIX, |
| CodeTaskType.REVIEW, |
| CodeTaskType.REFACTOR, |
| } |
|
|
|
|
| def should_use_github(task_type: CodeTaskType, request: CodeXRequest) -> bool: |
| if not settings.ENABLE_GITHUB_SEARCH: |
| return False |
|
|
| if task_type == CodeTaskType.FIX: |
| return bool( |
| (request.framework and request.framework.strip()) |
| or (request.error_message and request.error_message.strip()) |
| or (request.language and request.language.strip()) |
| ) |
|
|
| if task_type in {CodeTaskType.REVIEW, CodeTaskType.REFACTOR}: |
| return bool( |
| (request.framework and request.framework.strip()) |
| or (request.language and request.language.strip()) |
| ) |
|
|
| return False |
|
|
|
|
| def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]: |
| supported_tasks = { |
| CodeTaskType.FIX, |
| CodeTaskType.REVIEW, |
| CodeTaskType.REFACTOR, |
| } |
|
|
| if task_type not in supported_tasks: |
| return [] |
|
|
| collected: List[RetrievedEvidence] = [] |
|
|
| if should_use_stack(task_type): |
| stack_results = search_stackoverflow( |
| message=request.message, |
| error_message=request.error_message, |
| language=request.language, |
| framework=request.framework, |
| code=request.code, |
| max_results=settings.MAX_STACK_RESULTS, |
| ) |
| collected.extend(stack_results) |
|
|
| if should_use_github(task_type, request): |
| github_results = search_github( |
| message=request.message, |
| error_message=request.error_message, |
| language=request.language, |
| framework=request.framework, |
| max_results=settings.MAX_GITHUB_RESULTS, |
| ) |
| collected.extend(github_results) |
|
|
| unique_items = deduplicate_evidence(collected) |
| ranked_items = sort_evidence(unique_items) |
| final_items = trim_evidence(ranked_items) |
|
|
| return final_items |