File size: 4,693 Bytes
f736041
 
 
02aebba
 
 
 
 
249284d
 
 
 
 
 
02aebba
 
249284d
 
 
 
 
 
 
 
 
 
02aebba
3404ee0
37e55ed
 
 
 
 
 
 
3404ee0
 
e4a181a
f736041
 
3404ee0
f736041
3404ee0
f736041
37e55ed
 
 
 
f736041
 
62eba67
 
 
 
 
 
249284d
 
 
02aebba
 
3404ee0
fa5956e
37e55ed
 
295a884
e4a181a
f736041
 
 
fa5956e
295a884
fa5956e
46c7455
 
 
e52ae9a
fa5956e
295a884
e52ae9a
fa5956e
02aebba
 
fa5956e
02aebba
3404ee0
e4a181a
249284d
 
 
3404ee0
249284d
37e55ed
 
 
 
f736041
 
249284d
 
 
 
 
 
 
 
 
02aebba
 
3404ee0
fa5956e
37e55ed
 
295a884
f736041
 
 
fa5956e
 
295a884
fa5956e
295a884
46c7455
 
 
295a884
 
fa5956e
295a884
 
 
 
fa5956e
02aebba
 
 
3404ee0
e4a181a
249284d
 
 
3404ee0
249284d
37e55ed
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from litellm import completion


def _completion_kwargs(
    api_base: str | None,
    api_key: str | None,
    temperature: float | None,
) -> dict:
    """Build kwargs for litellm.completion from api settings."""
    kwargs: dict = {}
    if api_base:
        kwargs["api_base"] = api_base
    if api_key:
        kwargs["api_key"] = api_key
    if temperature is not None:
        kwargs["temperature"] = temperature
    return kwargs


def generate_players(
    instruction: str,
    n: int,
    model: str = "gpt-4o-mini",
    *,
    api_base: str | None = None,
    api_key: str | None = None,
    temperature: float | None = None,
    thinking: bool = False,
    return_usage: bool = False,
) -> list[str] | tuple[list[str], object]:
    """Request ``n`` completions for the instruction using the given model.

    When ``return_usage`` is ``True`` the ``usage`` object from the completion
    response is also returned.
    """
    messages = [{"role": "user", "content": instruction}]
    kwargs = _completion_kwargs(api_base, api_key, temperature)
    # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
    response = completion(
        model=model,
        messages=messages,
        n=n,
        **kwargs,
    )
    players = [c.message.content.strip() for c in response.choices]
    if return_usage:
        return players, getattr(response, "usage", None)
    return players


def prompt_score(
    instruction: str,
    criteria_list: list[str],
    criteria_block: str,
    player: str,
    model: str = "gpt-4o-mini",
    *,
    api_base: str | None = None,
    api_key: str | None = None,
    temperature: float | None = None,
    include_instruction: bool = True,
    thinking: bool = False,
    explain: bool = False,
    return_usage: bool = False,
) -> str | tuple[str, object]:
    """Return a plaintext score evaluation for `player`."""
    example_scores = ", ".join(["5"] * len(criteria_list)) or "5"
    prompt = f"""Evaluate the output below on the following criteria:
{criteria_block}

"""

    if explain:
        prompt += "Provide detailed reasons in English.\n"\
                "Respond in plain text with two sections in following format:\n" \
                 "Reasons:\n<explain your reasoning in each criteria before write final score>\n\n\n" \
                 f"Final verdict: <list of each criteria score in range 1-10> (e.g. [{example_scores}])"
    else:
        prompt += "Respond in plain text exactly like:\n" \
                 f"Final verdict: <list of each criteria score in range 1-10> (e.g. [{example_scores}])"

    if include_instruction:
        prompt += f"\n\nInstruction:\n{instruction}"

    prompt += f"\n\nOutput:\n{player}"
    kwargs = _completion_kwargs(api_base, api_key, temperature)
    # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
    response = completion(
        model=model,
        messages=[{"role": "system", "content": prompt}],
        **kwargs,
    )
    text = response.choices[0].message.content.strip()
    if return_usage:
        return text, getattr(response, "usage", None)
    return text


def prompt_pairwise(
    instruction: str,
    criteria_block: str,
    a: str,
    b: str,
    model: str = "gpt-4o-mini",
    *,
    api_base: str | None = None,
    api_key: str | None = None,
    temperature: float | None = None,
    include_instruction: bool = True,
    thinking: bool = False,
    explain: bool = False,
    return_usage: bool = False,
) -> str | tuple[str, object]:
    """Return which player wins in plaintext using the given criteria."""
    prompt = f"""Compare the two players below using:
{criteria_block}

"""

    verdict_example = "Final verdict: A or Final verdict: B"
    if explain:
        prompt += (
            "Provide detailed reasons in English.\n" \
            "Respond in plain text with two sections in following format:\n"
            "Reasons:\n<explain your reasoning in each criteria before write final verdict>\n\n\n"
            f"{verdict_example}"
        )
    else:
        prompt += (
            "Respond in plain text exactly like:\n"
            f"{verdict_example}"
        )

    if include_instruction:
        prompt += f"\n\nInstruction:\n{instruction}"
    prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
    kwargs = _completion_kwargs(api_base, api_key, temperature)
    # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
    response = completion(
        model=model,
        messages=[{"role": "system", "content": prompt}],
        **kwargs,
    )
    text = response.choices[0].message.content.strip()
    if return_usage:
        return text, getattr(response, "usage", None)
    return text