Paperbag's picture
trying to add thought process
9df4794
raw
history blame
8.15 kB
import os
from typing import TypedDict, List, Dict, Any, Optional, Union
from langchain_core import tools
from langgraph.graph import StateGraph, START, END
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from ddgs import DDGS
from dotenv import load_dotenv
from groq import Groq
from langchain_groq import ChatGroq
from langchain_community.document_loaders.image import UnstructuredImageLoader
load_dotenv()
# Base Hugging Face LLM used by the chat wrapper
# base_llm = HuggingFaceEndpoint(
# repo_id="openai/gpt-oss-20b:hyperbolic",
# # deepseek-ai/DeepSeek-OCR:novita
# task="text-generation",
# temperature=0.0,
# huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
# )
model = ChatGroq(
model="meta-llama/llama-4-scout-17b-16e-instruct",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
# other params...
)
@tool
def web_search(keywords: str, max_results:int = 5) -> str:
"""
Uses duckduckgo to search the web
Use cases:
- Identify personal information
- Information search
- Finding organisation information
- Obtain the latest news
Args:
keywords: keywords used to search the web
max_results: number of results to show after searching the web, defaults to 5
Returns:
Search result (Header + body + url)
"""
with DDGS() as ddgs:
# Perform a text search
output = ""
results = ddgs.text(keywords, max_results = max_results)
for result in results:
output += f"Results: {result['title']}\n{result['body']}\n{result['href']}\n\n"
return(output)
# @tool
# def get_image_file(task_id):
# """
# Get the image file from the question
# Use cases:
# - Extract Image from the question
# Args:
# task_id: the task_id of the question
# Returns:
# Image file result
# """
# loader = UnstructuredImageLoader("./example_data/layout-parser-paper-screenshot.png")
# data = loader.load()
# data[0]
# return ''
class AgentState(TypedDict):
messages: List[Union[HumanMessage, AIMessage, SystemMessage]]
def read_message(state: AgentState) -> AgentState:
messages = state["messages"]
print(f"Processing question: {messages[-1].content if messages else ''}")
# Just pass the messages through to the next node
return {"messages": messages}
def restart_required(state: AgentState) -> AgentState:
messages = state["messages"]
print(f"Processing question: {messages[-1].content if messages else ''}")
# Just pass the messages through to the next node
return {"messages": messages}
# def tool_message(state: AgentState) -> AgentState:
# messages = state["messages"]
# prompt = f"""
# You are a GAIA question answering expert.
# Your task is to decide whether to use a tool or not.
# If you need to use a tool, answer ONLY:
# CALL_TOOL: <your tool name>
# If you do not need to use a tool, answer ONLY:
# NO_TOOL
# Here is the question:
# {messages}
# """
# return {"messages": messages}
# response = model_with_tools.invoke(prompt)
# return {"messages": messages + [response]}
# Augment the LLM with tools
tools = [web_search]
tools_by_name = {tool.name: tool for tool in tools}
model_with_tools = model.bind_tools(tools)
def answer_message(state: AgentState) -> AgentState:
messages = state["messages"]
prompt = [SystemMessage(f"""
You are a GAIA question answering expert.
Your task is to provide an answer to a question.
Think carefully before answering the question.
Do not include any thought process before answering the question, and only response exactly what was being asked of you.
If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
Example question: How many hours are there in a day?
Response: 24
""")]
messages = prompt + messages
# First pass: let model decide whether to call web_search
ai_msg = model_with_tools.invoke(messages)
messages.append(ai_msg)
# If the model didn't request any tools, its content is already the answer
tool_calls = getattr(ai_msg, "tool_calls", None) or []
if not tool_calls:
print(f"Final response: {ai_msg}")
return {"messages": messages}
# Execute requested tools and append their text output into the conversation
for tool_call in tool_calls:
name = tool_call["name"]
args = tool_call["args"]
tool = tools_by_name[name]
tool_result = tool.invoke(args) # this is a plain string from web_search
messages.append(HumanMessage(content=f"Tool result ({name}):\n{tool_result}"))
# Second pass: force a plain-text final answer (no tool calls expected)
final_instruction = HumanMessage(
content=(
"Using the tool results above, provide the FINAL numeric/text answer now. "
"Do not call any tools. Respond with only the answer."
)
)
messages.append(final_instruction)
final_response = model.invoke(messages)
print(f"Final response: {final_response}")
# Return messages including the final AIMessage so BasicAgent reads .content
messages.append(final_response)
return {"messages": messages}
def thought_process(state: AgentState) -> AgentState:
messages = state["messages"]
prompt = [SystemMessage(f"""
You are a GAIA question answering assistant.
Your task is to list down the steps and chain of thoughts to answer the question provided carefully.
Think carefully before answering the question.
The steps provided should simplify the thinking of the another person and help them to derive the answer.
Include steps that require the use of specific tools or state the limitation that you're facing.
""")]
messages = prompt + messages
# First pass: let model decide whether to call web_search
ai_msg = model_with_tools.invoke(messages)
messages.append(ai_msg)
# If the model didn't request any tools, its content is already the answer
tool_calls = getattr(ai_msg, "tool_calls", None) or []
if not tool_calls:
print(f"Final response: {ai_msg}")
return {"messages": messages}
# Execute requested tools and append their text output into the conversation
for tool_call in tool_calls:
name = tool_call["name"]
args = tool_call["args"]
tool = tools_by_name[name]
tool_result = tool.invoke(args) # this is a plain string from web_search
messages.append(HumanMessage(content=f"Tool result ({name}):\n{tool_result}"))
# Second pass: force a plain-text final answer (no tool calls expected)
final_instruction = HumanMessage(
content=(
"Using the tool results above, provide the list of steps answer now. "
"Do not call any tools. Respond with only the chain of thought."
)
)
messages.append(final_instruction)
final_response = model.invoke(messages)
print(f"Final response: {final_response}")
# Return messages including the final AIMessage so BasicAgent reads .content
messages.append(final_response)
return {"thoughts": messages}
def build_graph():
agent_graph = StateGraph(AgentState)
# Add nodes
agent_graph.add_node("read_message", read_message)
agent_graph.add_node("answer_message", answer_message)
agent_graph.add_node("thought_process", thought_process)
# Add edges
agent_graph.add_edge(START, "read_message")
agent_graph.add_edge("read_message", "answer_message")
agent_graph.add_conditional_edges("answer_message", restart_required, {True:"thought_process", False:END})
# Final edge
agent_graph.add_edge("answer_message", END)
# Compile and return the executable graph for use in app.py
compiled_graph = agent_graph.compile()
return compiled_graph