File size: 3,211 Bytes
049f60c
 
 
 
1ea7c95
049f60c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ea7c95
 
 
 
 
 
 
 
 
 
 
 
 
049f60c
 
 
1ea7c95
049f60c
 
 
 
1ea7c95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
049f60c
 
 
 
 
 
 
 
 
 
 
 
1ea7c95
049f60c
 
 
 
1ea7c95
049f60c
 
 
 
 
 
 
 
 
 
 
1ea7c95
049f60c
1ea7c95
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from typing import List

from github_search import search_github
from stack_search import search_stackoverflow
from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType
from config import settings


def normalize_text(text: str) -> str:
    return (text or "").strip().lower()


def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
    seen = set()
    unique_items: List[RetrievedEvidence] = []

    for item in evidence_list:
        key = (
            normalize_text(item.title),
            normalize_text(item.url or ""),
            item.source_type.value,
        )

        if key in seen:
            continue

        seen.add(key)
        unique_items.append(item)

    return unique_items


def source_priority(source_type: SourceType) -> int:
    if source_type == SourceType.STACKOVERFLOW:
        return 2
    if source_type == SourceType.GITHUB:
        return 1
    return 0


def score_evidence(item: RetrievedEvidence) -> float:
    base = item.score if item.score is not None else 0.0
    return base + source_priority(item.source_type)


def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
    return sorted(
        evidence_list,
        key=lambda item: score_evidence(item),
        reverse=True,
    )


def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
    stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW]
    github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB]

    stack_items = stack_items[:3]
    github_items = github_items[:2]

    combined = stack_items + github_items
    combined = sort_evidence(combined)

    return combined[: settings.MAX_RETRIEVED_ITEMS]


def should_use_github(request: CodeXRequest) -> bool:
    if not settings.ENABLE_GITHUB_SEARCH:
        return False

    if request.framework and request.framework.strip():
        return True

    if request.error_message and request.error_message.strip():
        return True

    return False


def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]:
    if task_type != CodeTaskType.FIX:
        return []

    collected: List[RetrievedEvidence] = []

    if settings.ENABLE_STACK_SEARCH:
        stack_results = search_stackoverflow(
            message=request.message,
            error_message=request.error_message,
            language=request.language,
            framework=request.framework,
            code=request.code,
            max_results=settings.MAX_STACK_RESULTS,
        )
        collected.extend(stack_results)

    if should_use_github(request):
        github_results = search_github(
            message=request.message,
            error_message=request.error_message,
            language=request.language,
            framework=request.framework,
            max_results=settings.MAX_GITHUB_RESULTS,
        )
        collected.extend(github_results)

    unique_items = deduplicate_evidence(collected)
    ranked_items = sort_evidence(unique_items)
    final_items = trim_evidence(ranked_items)

    return final_items