File size: 3,350 Bytes
f736041
 
 
02aebba
 
 
 
 
249284d
 
 
 
 
 
02aebba
 
249284d
 
 
 
 
 
 
 
 
 
02aebba
37e55ed
 
 
 
 
 
 
f736041
 
 
 
02aebba
f736041
37e55ed
 
 
 
f736041
 
62eba67
 
 
 
 
 
249284d
 
 
02aebba
 
37e55ed
 
f736041
62eba67
f736041
 
 
02aebba
 
 
 
249284d
 
 
02aebba
249284d
37e55ed
 
 
 
f736041
 
249284d
 
 
 
 
 
 
 
 
02aebba
 
37e55ed
 
f736041
 
 
 
02aebba
 
 
 
249284d
 
 
02aebba
249284d
37e55ed
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from litellm import completion


def _completion_kwargs(
    api_base: str | None,
    api_key: str | None,
    temperature: float | None,
) -> dict:
    """Build kwargs for litellm.completion from api settings."""
    kwargs: dict = {}
    if api_base:
        kwargs["api_base"] = api_base
    if api_key:
        kwargs["api_key"] = api_key
    if temperature is not None:
        kwargs["temperature"] = temperature
    return kwargs


def generate_players(
    instruction: str,
    n: int,
    model: str = "gpt-4o-mini",
    *,
    api_base: str | None = None,
    api_key: str | None = None,
    temperature: float | None = None,
    return_usage: bool = False,
) -> list[str] | tuple[list[str], object]:
    """Request ``n`` completions for the instruction using the given model.

    When ``return_usage`` is ``True`` the ``usage`` object from the completion
    response is also returned.
    """
    response = completion(
        model=model,
        messages=[{"role": "user", "content": instruction}],
        n=n,
        **_completion_kwargs(api_base, api_key, temperature),
    )
    players = [c.message.content.strip() for c in response.choices]
    if return_usage:
        return players, getattr(response, "usage", None)
    return players


def prompt_score(
    instruction: str,
    criteria_list: list[str],
    criteria_block: str,
    player: str,
    model: str = "gpt-4o-mini",
    *,
    api_base: str | None = None,
    api_key: str | None = None,
    temperature: float | None = None,
    include_instruction: bool = True,
    return_usage: bool = False,
) -> str | tuple[str, object]:
    """Return a JSON score string evaluating `player` on the criteria."""
    example_scores = ", ".join(["1-10"] * len(criteria_list)) or "1-10"
    prompt = f"""Evaluate the output below on the following criteria:
{criteria_block}

Return JSON exactly like: {{"scores": [{example_scores}]}}."""
    if include_instruction:
        prompt += f"\n\nInstruction:\n{instruction}"
    prompt += f"\n\nOutput:\n{player}"
    response = completion(
        model=model,
        messages=[{"role": "system", "content": prompt}],
        **_completion_kwargs(api_base, api_key, temperature),
    )
    text = response.choices[0].message.content.strip()
    if return_usage:
        return text, getattr(response, "usage", None)
    return text


def prompt_pairwise(
    instruction: str,
    criteria_block: str,
    a: str,
    b: str,
    model: str = "gpt-4o-mini",
    *,
    api_base: str | None = None,
    api_key: str | None = None,
    temperature: float | None = None,
    include_instruction: bool = True,
    return_usage: bool = False,
) -> str | tuple[str, object]:
    """Return which player wins in JSON using the given criteria."""
    prompt = f"""Compare the two players below using:
{criteria_block}

Return ONLY JSON {{"winner": "A"}} or {{"winner": "B"}}."""
    if include_instruction:
        prompt += f"\n\nInstruction:\n{instruction}"
    prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
    response = completion(
        model=model,
        messages=[{"role": "system", "content": prompt}],
        **_completion_kwargs(api_base, api_key, temperature),
    )
    text = response.choices[0].message.content.strip()
    if return_usage:
        return text, getattr(response, "usage", None)
    return text