serverdaun commited on
Commit
04c9c28
·
1 Parent(s): d0ec9a4

add agent back-end

Browse files
Files changed (1) hide show
  1. sidekick.py +257 -0
sidekick.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import uuid
3
+ from datetime import datetime
4
+ from typing import Annotated, Any, List, Literal, Optional
5
+
6
+ from dotenv import load_dotenv
7
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
8
+ from langchain_openai import ChatOpenAI
9
+ from langgraph.checkpoint.memory import MemorySaver
10
+ from langgraph.graph import END, START, StateGraph
11
+ from langgraph.graph.message import add_messages
12
+ from langgraph.prebuilt import ToolNode
13
+ from pydantic import BaseModel, Field
14
+
15
+ from tools import get_all_tools_with_browser
16
+
17
+ load_dotenv(override=True)
18
+
19
+
20
+ class State(BaseModel):
21
+ """State for the Sidekick"""
22
+
23
+ messages: Annotated[List[Any], add_messages]
24
+ success_criteria: str
25
+ feedback_on_work: Optional[str]
26
+ success_criteria_met: bool
27
+ user_input_needed: bool
28
+
29
+
30
+ class EvaluatorOutput(BaseModel):
31
+ """Output for the evaluator"""
32
+
33
+ feedback: str = Field(description="Feedback on the assistant's response")
34
+ success_criteria_met: bool = Field(
35
+ description="Whether the success criteria have been met"
36
+ )
37
+ user_input_needed: bool = Field(
38
+ description="True if more input is needed from the user, or clarifications, or the assistant is stuck"
39
+ )
40
+
41
+
42
+ class Sidekick:
43
+ """Sidekick class"""
44
+
45
+ def __init__(self):
46
+ self.worker_llm_with_tools = None
47
+ self.evaluator_llm_with_output = None
48
+ self.tools = None
49
+ self.llm_with_tools = None
50
+ self.graph = None
51
+ self.sidekick_id = str(uuid.uuid4())
52
+ self.memory = MemorySaver()
53
+ self.browser = None
54
+ self.playwright = None
55
+
56
+ async def setup(self) -> None:
57
+ """Setup function"""
58
+
59
+ self.tools, self.browser, self.playwright = await get_all_tools_with_browser()
60
+ worker_llm = ChatOpenAI(model="gpt-4o-mini")
61
+ self.worker_llm_with_tools = worker_llm.bind_tools(self.tools)
62
+ evaluator_llm = ChatOpenAI(model="gpt-4o-mini")
63
+ self.evaluator_llm_with_output = evaluator_llm.with_structured_output(
64
+ EvaluatorOutput
65
+ )
66
+ await self.build_graph()
67
+
68
+ async def worker(self, state: State) -> State:
69
+ """Worker function"""
70
+
71
+ system_message = f"""You are a helpful assistant that can use tools to complete tasks.
72
+ You keep working on a task until either you have a question or clarification for the user, or the success criteria is met.
73
+ You have many tools to help you, including tools to browse the internet, navigating and retrieving web pages.
74
+ You have a tool to run python code, but note that you would need to include a print() statement if you wanted to receive output.
75
+ The current date and time is {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
76
+
77
+ This is the success criteria:
78
+ {state.success_criteria}
79
+ You should reply either with a question for the user about this assignment, or with your final response.
80
+ If you have a question for the user, you need to reply by clearly stating your question. An example might be:
81
+
82
+ Question: please clarify whether you want a summary or a detailed answer
83
+
84
+ If you've finished, reply with the final answer, and don't ask a question; simply reply with the answer.
85
+ """
86
+
87
+ if state.feedback_on_work:
88
+ system_message += f"""
89
+ Previously you thought you completed the assignment, but your reply was rejected because the success criteria was not met.
90
+ Here is the feedback on why this was rejected:
91
+ {state.feedback_on_work}
92
+ With this feedback, please continue the assignment, ensuring that you meet the success criteria or have a question for the user."""
93
+
94
+ # Add in the system message
95
+ found_system_message = False
96
+ messages = state.messages
97
+ for message in messages:
98
+ if isinstance(message, SystemMessage):
99
+ message.content = system_message
100
+ found_system_message = True
101
+
102
+ if not found_system_message:
103
+ messages = [SystemMessage(content=system_message)] + messages
104
+
105
+ # Invoke the LLM with tools (async)
106
+ response = await self.worker_llm_with_tools.ainvoke(messages)
107
+
108
+ # Return updated state
109
+ return {
110
+ "messages": [response],
111
+ }
112
+
113
+ def worker_router(self, state: State) -> Literal["tools", "evaluator"]:
114
+ """Worker router function"""
115
+
116
+ last_message = state.messages[-1]
117
+
118
+ if hasattr(last_message, "tool_calls") and last_message.tool_calls:
119
+ return "tools"
120
+ else:
121
+ return "evaluator"
122
+
123
+ def format_conversation(self, messages: List[Any]) -> str:
124
+ """Format conversation function"""
125
+
126
+ conversation = "Conversation history:\n\n"
127
+ for message in messages:
128
+ if isinstance(message, HumanMessage):
129
+ conversation += f"User: {message.content}\n"
130
+ elif isinstance(message, AIMessage):
131
+ text = message.content or "[Tools use]"
132
+ conversation += f"Assistant: {text}\n"
133
+ return conversation
134
+
135
+ async def evaluator(self, state: State) -> State:
136
+ """Evaluator function"""
137
+
138
+ last_response = state.messages[-1].content
139
+
140
+ system_message = f"""You are an evaluator that determines if a task has been completed successfully by an Assistant.
141
+ Assess the Assistant's last response based on the given criteria. Respond with your feedback, and with your decision on whether the success criteria has been met,
142
+ and whether more input is needed from the user."""
143
+
144
+ user_message = f"""You are evaluating a conversation between the User and Assistant. You decide what action to take based on the last response from the Assistant.
145
+
146
+ The entire conversation with the assistant, with the user's original request and all replies, is:
147
+ {self.format_conversation(state.messages)}
148
+
149
+ The success criteria for this assignment is:
150
+ {state.success_criteria}
151
+
152
+ And the final response from the Assistant that you are evaluating is:
153
+ {last_response}
154
+
155
+ Respond with your feedback, and decide if the success criteria is met by this response.
156
+ Also, decide if more user input is required, either because the assistant has a question, needs clarification, or seems to be stuck and unable to answer without help.
157
+
158
+ The Assistant has access to a tool to write files. If the Assistant says they have written a file, then you can assume they have done so.
159
+ Overall you should give the Assistant the benefit of the doubt if they say they've done something. But you should reject if you feel that more work should go into this.
160
+
161
+ """
162
+ if state.feedback_on_work:
163
+ user_message += f"""Also, note that in a prior attempt from the Assistant, you provided this feedback: {state.feedback_on_work}
164
+
165
+ If you're seeing the Assistant repeating the same mistakes, then consider responding that user input is required."""
166
+
167
+ evaluator_messages = [
168
+ SystemMessage(content=system_message),
169
+ HumanMessage(content=user_message),
170
+ ]
171
+
172
+ eval_result = await self.evaluator_llm_with_output.ainvoke(evaluator_messages)
173
+ new_state = {
174
+ "messages": [
175
+ AIMessage(
176
+ content=f"Evaluator Feedback on this answer: {eval_result.feedback}"
177
+ )
178
+ ],
179
+ "feedback_on_work": eval_result.feedback,
180
+ "success_criteria_met": eval_result.success_criteria_met,
181
+ "user_input_needed": eval_result.user_input_needed,
182
+ }
183
+ return new_state
184
+
185
+ def route_based_on_evaluation(self, state: State) -> Literal["worker", "END"]:
186
+ """Route based on evaluation function"""
187
+
188
+ if state.success_criteria_met or state.user_input_needed:
189
+ return "END"
190
+ else:
191
+ return "worker"
192
+
193
+ async def build_graph(self) -> None:
194
+ """Build the graph"""
195
+
196
+ # Set up Graph Builder with State
197
+ graph_builder = StateGraph(State)
198
+
199
+ # Add nodes
200
+ graph_builder.add_node("worker", self.worker)
201
+ graph_builder.add_node("tools", ToolNode(tools=self.tools))
202
+ graph_builder.add_node("evaluator", self.evaluator)
203
+
204
+ # Add edges
205
+ graph_builder.add_conditional_edges(
206
+ "worker", self.worker_router, {"tools": "tools", "evaluator": "evaluator"}
207
+ )
208
+ graph_builder.add_edge("tools", "worker")
209
+ graph_builder.add_conditional_edges(
210
+ "evaluator",
211
+ self.route_based_on_evaluation,
212
+ {"worker": "worker", "END": END},
213
+ )
214
+ graph_builder.add_edge(START, "worker")
215
+
216
+ # Compile the graph
217
+ self.graph = graph_builder.compile(checkpointer=self.memory)
218
+
219
+ async def run_superstep(
220
+ self, message: str, success_criteria: str, history: List[Any]
221
+ ) -> List[Any]:
222
+ """Run a superstep"""
223
+
224
+ config = {"configurable": {"thread_id": self.sidekick_id}}
225
+
226
+ state = {
227
+ "messages": [HumanMessage(content=message)],
228
+ "success_criteria": success_criteria
229
+ or "The answer should be clear and accurate",
230
+ "feedback_on_work": None,
231
+ "success_criteria_met": False,
232
+ "user_input_needed": False,
233
+ }
234
+ result = await self.graph.ainvoke(state, config=config)
235
+ user = {"role": "user", "content": message}
236
+ reply = {"role": "assistant", "content": result["messages"][-2].content}
237
+ feedback = {"role": "assistant", "content": result["messages"][-1].content}
238
+ return history + [user, reply, feedback]
239
+
240
+ def cleanup(self) -> None:
241
+ """Cleanup function"""
242
+
243
+ if self.browser:
244
+ try:
245
+ loop = asyncio.get_running_loop()
246
+ loop.create_task(self.browser.close())
247
+ if self.playwright:
248
+ loop.create_task(self.playwright.stop())
249
+ except RuntimeError:
250
+ # If no loop is running, do a direct run
251
+ asyncio.run(self.browser.close())
252
+ if self.playwright:
253
+ asyncio.run(self.playwright.stop())
254
+
255
+ def free_resources(self) -> None:
256
+ """Public alias for cleanup so external callers can reliably release resources."""
257
+ self.cleanup()