Final_Assignment_GAIA

Paused

App Files Files Community

Final_Assignment_GAIA / agent.py

superone001

Update agent.py

9f3fd39 verified 11 months ago

raw

history blame

7.59 kB

	from dotenv import load_dotenv

	from langchain_openai import ChatOpenAI
	from langchain_core.tools import tool
	from langchain_community.document_loaders import WikipediaLoader
	from langchain_community.document_loaders import ArxivLoader
	from langchain_community.tools.tavily_search import TavilySearchResults
	from langchain_tavily import TavilyExtract
	from youtube_transcript_api import YouTubeTranscriptApi

	from langchain_core.messages import SystemMessage, HumanMessage
	from langgraph.graph import START, StateGraph, MessagesState
	from langgraph.prebuilt import ToolNode
	from langgraph.prebuilt import tools_condition
	import base64
	import httpx


	load_dotenv()

	@tool
	def add(a: int, b: int) -> int:
	"""
	Add b to a.

	Args:
	a: first int number
	b: second int number
	"""
	return a + b

	@tool
	def substract(a: int, b: int) -> int:
	"""
	Subtract b from a.

	Args:
	a: first int number
	b: second int number
	"""
	return a - b

	@tool
	def multiply(a: int, b: int) -> int:
	"""
	Multiply a by b.

	Args:
	a: first int number
	b: second int number
	"""
	return a * b

	@tool
	def divide(a: int, b: int) -> int:
	"""
	Divide a by b.

	Args:
	a: first int number
	b: second int number
	"""
	if b == 0:
	raise ValueError("Can't divide by zero.")
	return a / b

	@tool
	def mod(a: int, b: int) -> int:
	"""
	Remainder of a devided by b.

	Args:
	a: first int number
	b: second int number
	"""
	return a % b

	@tool
	def wiki_search(query: str) -> str:
	"""
	Search Wikipedia.

	Args:
	query: what to search for
	"""
	search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "".join(
	[
	f'<START source="{doc.metadata["source"]}">{doc.page_content[:1000]}<END>'
	for doc in search_docs
	])
	return {"wiki_results": formatted_search_docs}

	@tool
	def arvix_search(query: str) -> str:
	"""
	Search arXiv which is online archive of preprint and postprint manuscripts
	for different fields of science.

	Args:
	query: what to search for
	"""
	search_docs = ArxivLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "".join(
	[
	f'<START source="{doc.metadata["source"]}">{doc.page_content[:1000]}<END>'
	for doc in search_docs
	])
	return {"arvix_results": formatted_search_docs}

	@tool
	def web_search(query: str) -> str:
	"""
	Search WEB.

	Args:
	query: what to search for
	"""
	search_docs = TavilySearchResults(max_results=3, include_answer=True).invoke({"query": query})
	formatted_search_docs = "".join(
	[
	f'<START source="{doc["url"]}">{doc["content"][:1000]}<END>'
	for doc in search_docs
	])
	return {"web_results": formatted_search_docs}

	@tool
	def open_web_page(url: str) -> str:
	"""
	Open web page and get its content.

	Args:
	url: web page url in ""
	"""
	search_docs = TavilyExtract().invoke({"urls": [url]})
	formatted_search_docs = f'<START source="{search_docs["results"][0]["url"]}">{search_docs["results"][0]["raw_content"][:1000]}<END>'
	return {"web_page_content": formatted_search_docs}

	@tool
	def youtube_transcript(url: str) -> str:
	"""
	Get transcript of YouTube video.
	Args:
	url: YouTube video url in ""
	"""
	video_id = url.partition("https://www.youtube.com/watch?v=")[2]
	transcript = YouTubeTranscriptApi.get_transcript(video_id)
	transcript_text = " ".join([item["text"] for item in transcript])
	return {"youtube_transcript": transcript_text}


	tools = [
	add,
	substract,
	multiply,
	divide,
	mod,
	wiki_search,
	arvix_search,
	web_search,
	open_web_page,
	youtube_transcript,
	]

	# System prompt
	system_prompt = f"""
	You are a general AI assistant. I will ask you a question.
	First, provide a step-by-step explanation of your reasoning to arrive at the answer.
	Then, respond with your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
	[YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
	If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
	If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
	If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
	"""
	system_message = SystemMessage(content=system_prompt)

	# Build graph
	def build_graph():
	"""Build LangGrapth graph of agent."""

	# Language model and tools
	llm = HuggingFaceEndpoint(
	endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
	max_new_tokens=500, # 直接指定
	temperature=0.1, # 直接指定
	repetition_penalty=1.2, # 直接指定
	top_p=0.9, # 可选参数
	# 其他参数也可以直接在这里指定
	)
	llm_with_tools = llm.bind_tools(tools, strict=True)

	# Nodes
	def assistant(state: MessagesState):
	"""Assistant node."""
	return {"messages": [llm_with_tools.invoke([system_message] + state["messages"])]}

	# Graph
	builder = StateGraph(MessagesState)
	builder.add_node("assistant", assistant)
	builder.add_node("tools", ToolNode(tools))
	builder.add_edge(START, "assistant")
	builder.add_conditional_edges("assistant", tools_condition)
	builder.add_edge("tools", "assistant")

	# Compile graph
	return builder.compile()


	# Testing and solving particular tasks
	if __name__ == "__main__":

	agent = build_graph()

	question = """
	Review the chess position provided in the image. It is black's turn.
	Provide the correct next move for black which guarantees a win.
	Please provide your response in algebraic notation.
	"""
	content_urls = {
	"image": "https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44",
	"audio": None
	}

	# Define user message and add all the content
	content = [
	{
	"type": "text",
	"text": question
	}
	]
	if content_urls["image"]:
	image_data = base64.b64encode(httpx.get(content_urls["image"]).content).decode("utf-8")
	content.append(
	{
	"type": "image",
	"source_type": "base64",
	"data": image_data,
	"mime_type": "image/jpeg"
	}
	)
	if content_urls["audio"]:
	audio_data = base64.b64encode(httpx.get(content_urls["audio"]).content).decode("utf-8")
	content.append(
	{
	"type": "audio",
	"source_type": "base64",
	"data": audio_data,
	"mime_type": "audio/wav"
	}
	)
	messages = {
	"role": "user",
	"content": content
	}

	# Run agent on the question
	messages = agent.invoke({"messages": messages})
	for message in messages["messages"]:
	message.pretty_print()

	answer = messages["messages"][-1].content
	index = answer.find("FINAL ANSWER: ")

	print("\n")
	print("="*30)
	if index == -1:
	print(answer)
	print(answer[index+14:])
	print("="*30)