Spaces:
Sleeping
Sleeping
File size: 2,608 Bytes
b2315b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
from __future__ import annotations
import asyncio
from dataclasses import dataclass
from typing import Literal
import os
import dotenv
dotenv.load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
from agents import Agent, ItemHelpers, Runner, TResponseInputItem, trace
"""
This example shows the LLM as a judge pattern. The first agent generates an outline for a story.
The second agent judges the outline and provides feedback. We loop until the judge is satisfied
with the outline.
"""
story_outline_generator = Agent(
name="story_outline_generator",
instructions=(
"You generate a very short story outline based on the user's input. "
"If there is any feedback provided, use it to improve the outline."
),
)
@dataclass
class EvaluationFeedback:
feedback: str
score: Literal["pass", "needs_improvement", "fail"]
evaluator = Agent[None](
name="evaluator",
instructions=(
"You evaluate a story outline and decide if it's good enough. "
"If it's not good enough, you provide feedback on what needs to be improved. "
"Never give it a pass on the first try. After 5 attempts, you can give it a pass if the story outline is good enough - do not go for perfection"
),
output_type=EvaluationFeedback,
)
async def main() -> None:
msg = input("What kind of story would you like to hear? ")
input_items: list[TResponseInputItem] = [{"content": msg, "role": "user"}]
latest_outline: str | None = None
# We'll run the entire workflow in a single trace
with trace("LLM as a judge"):
while True:
story_outline_result = await Runner.run(
story_outline_generator,
input_items,
)
input_items = story_outline_result.to_input_list()
latest_outline = ItemHelpers.text_message_outputs(story_outline_result.new_items)
print("Story outline generated")
evaluator_result = await Runner.run(evaluator, input_items)
result: EvaluationFeedback = evaluator_result.final_output
print(f"Evaluator score: {result.score}")
if result.score == "pass":
print("Story outline is good enough, exiting.")
break
print("Re-running with feedback")
input_items.append({"content": f"Feedback: {result.feedback}", "role": "user"})
print(f"Final story outline: {latest_outline}")
if __name__ == "__main__":
asyncio.run(main()) |