File size: 4,093 Bytes
049f60c
 
 
 
1ea7c95
049f60c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ea7c95
 
 
 
 
 
 
 
 
 
 
 
 
049f60c
 
 
1ea7c95
049f60c
 
 
 
1ea7c95
 
 
4027d07
 
 
 
1ea7c95
 
4027d07
1ea7c95
4027d07
1ea7c95
 
 
 
 
4027d07
 
 
 
 
 
 
 
 
 
 
 
1ea7c95
 
 
4027d07
 
 
 
 
 
1ea7c95
4027d07
 
 
 
 
1ea7c95
 
 
 
049f60c
4027d07
 
 
 
 
 
 
049f60c
 
 
 
4027d07
049f60c
 
 
 
 
1ea7c95
049f60c
 
 
 
4027d07
049f60c
 
 
 
 
 
 
 
 
 
 
1ea7c95
049f60c
1ea7c95
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from typing import List

from github_search import search_github
from stack_search import search_stackoverflow
from schemas import CodeTaskType, CodeXRequest, RetrievedEvidence, SourceType
from config import settings


def normalize_text(text: str) -> str:
    return (text or "").strip().lower()


def deduplicate_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
    seen = set()
    unique_items: List[RetrievedEvidence] = []

    for item in evidence_list:
        key = (
            normalize_text(item.title),
            normalize_text(item.url or ""),
            item.source_type.value,
        )

        if key in seen:
            continue

        seen.add(key)
        unique_items.append(item)

    return unique_items


def source_priority(source_type: SourceType) -> int:
    if source_type == SourceType.STACKOVERFLOW:
        return 2
    if source_type == SourceType.GITHUB:
        return 1
    return 0


def score_evidence(item: RetrievedEvidence) -> float:
    base = item.score if item.score is not None else 0.0
    return base + source_priority(item.source_type)


def sort_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
    return sorted(
        evidence_list,
        key=lambda item: score_evidence(item),
        reverse=True,
    )


def trim_evidence(evidence_list: List[RetrievedEvidence]) -> List[RetrievedEvidence]:
    stack_items = [item for item in evidence_list if item.source_type == SourceType.STACKOVERFLOW]
    github_items = [item for item in evidence_list if item.source_type == SourceType.GITHUB]
    other_items = [
        item for item in evidence_list
        if item.source_type not in {SourceType.STACKOVERFLOW, SourceType.GITHUB}
    ]

    stack_items = stack_items[:3]
    github_items = github_items[:3]

    combined = stack_items + github_items + other_items
    combined = sort_evidence(combined)

    return combined[: settings.MAX_RETRIEVED_ITEMS]


def should_use_stack(task_type: CodeTaskType) -> bool:
    if not settings.ENABLE_STACK_SEARCH:
        return False

    return task_type in {
        CodeTaskType.FIX,
        CodeTaskType.REVIEW,
        CodeTaskType.REFACTOR,
    }


def should_use_github(task_type: CodeTaskType, request: CodeXRequest) -> bool:
    if not settings.ENABLE_GITHUB_SEARCH:
        return False

    if task_type == CodeTaskType.FIX:
        return bool(
            (request.framework and request.framework.strip())
            or (request.error_message and request.error_message.strip())
            or (request.language and request.language.strip())
        )

    if task_type in {CodeTaskType.REVIEW, CodeTaskType.REFACTOR}:
        return bool(
            (request.framework and request.framework.strip())
            or (request.language and request.language.strip())
        )

    return False


def retrieve_code_evidence(task_type: CodeTaskType, request: CodeXRequest) -> List[RetrievedEvidence]:
    supported_tasks = {
        CodeTaskType.FIX,
        CodeTaskType.REVIEW,
        CodeTaskType.REFACTOR,
    }

    if task_type not in supported_tasks:
        return []

    collected: List[RetrievedEvidence] = []

    if should_use_stack(task_type):
        stack_results = search_stackoverflow(
            message=request.message,
            error_message=request.error_message,
            language=request.language,
            framework=request.framework,
            code=request.code,
            max_results=settings.MAX_STACK_RESULTS,
        )
        collected.extend(stack_results)

    if should_use_github(task_type, request):
        github_results = search_github(
            message=request.message,
            error_message=request.error_message,
            language=request.language,
            framework=request.framework,
            max_results=settings.MAX_GITHUB_RESULTS,
        )
        collected.extend(github_results)

    unique_items = deduplicate_evidence(collected)
    ranked_items = sort_evidence(unique_items)
    final_items = trim_evidence(ranked_items)

    return final_items