AutoPage

Runtime error

App Files Files Community

AutoPage / camel /agents /tool_agents /hugging_face_tool_agent.py

Mqleet

upd code

fcaa164 about 2 months ago

raw

history blame contribute delete

8.72 kB

	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	from typing import Any, Optional

	from camel.agents.tool_agents.base import BaseToolAgent


	# flake8: noqa :E501
	class HuggingFaceToolAgent(BaseToolAgent):
	r"""Tool agent for calling HuggingFace models. This agent is a wrapper
	around agents from the `transformers` library. For more information
	about the available models, please see the `transformers` documentation
	at https://huggingface.co/docs/transformers/transformers_agents.

	Args:
	name (str): The name of the agent.
	*args (Any): Additional positional arguments to pass to the underlying
	Agent class.
	remote (bool, optional): Flag indicating whether to run the agent
	remotely. (default: :obj:`True`)
	**kwargs (Any): Additional keyword arguments to pass to the underlying
	Agent class.
	"""

	def __init__(
	self,
	name: str,
	*args: Any,
	remote: bool = True,
	**kwargs: Any,
	) -> None:
	try:
	# TODO: Support other tool agents
	import transformers
	from packaging import version

	if version.parse(transformers.__version__) < version.parse(
	"4.31.0"
	):
	raise ValueError(
	"The version of \"transformers\" package should >= 4.31.0"
	)

	from transformers.tools import OpenAiAgent
	from transformers.tools.agent_types import AgentImage
	except (ImportError, ValueError):
	raise ValueError(
	"Could not import transformers tool agents. "
	"Please setup the environment with "
	"pip install huggingface_hub==0.14.1 transformers==4.31.0 diffusers accelerate==0.20.3 datasets torch soundfile sentencepiece opencv-python"
	)
	self.agent_image_type = AgentImage
	self.agent = OpenAiAgent(args, *kwargs)
	description = f"""The `{name}` is a tool agent that can perform a variety of tasks including:
	- Document question answering: given a document (such as a PDF) in image format, answer a question on this document
	- Text question answering: given a long text and a question, answer the question in the text
	- Unconditional image captioning: Caption the image!
	- Image question answering: given an image, answer a question on this image
	- Image segmentation: given an image and a prompt, output the segmentation mask of that prompt
	- Speech to text: given an audio recording of a person talking, transcribe the speech into text
	- Text to speech: convert text to speech
	- Zero-shot text classification: given a text and a list of labels, identify to which label the text corresponds the most
	- Text summarization: summarize a long text in one or a few sentences
	- Translation: translate the text into a given language
	- Text downloading: to download a text from a web URL
	- Text to image: generate an image according to a prompt, leveraging stable diffusion
	- Image transformation: modify an image given an initial image and a prompt, leveraging instruct pix2pix stable diffusion
	- Text to video: generate a small video according to a prompt

	Here are some python code examples of what you can do with this agent:

	Single execution (step) mode, the single execution method is when using the step() method of the agent:
	```
	# Text to image
	rivers_and_lakes_image = {name}.step("Draw me a picture of rivers and lakes.")
	rivers_and_lakes_image.save("./rivers_and_lakes_image.png")

	# Text to image -> Image transformation
	sea_add_island_image = {name}.step("Draw me a picture of the sea then transform the picture to add an island")
	sea_add_island_image.save("./sea_add_island_image.png")

	# If you'd like to keep a state across executions or to pass non-text objects to the agent,
	# you can do so by specifying variables that you would like the agent to use. For example,
	# you could generate the first image of rivers and lakes, and ask the model to update that picture to add an island by doing the following:
	picture = {name}.step("Generate a picture of rivers and lakes.")
	picture.save("./picture.png")
	updated_picture = {name}.step("Transform the image in `picture` to add an island to it.", picture=picture)
	updated_picture.save("./updated_picture.png")

	capybara_sea_image = {name}.step("Draw me a picture of the `prompt`", prompt="a capybara swimming in the sea")
	capybara_sea_image.save("./capybara_sea_image.png")

	# Document question answering
	answer = {name}.step(
	"In the following `document`, where will the TRRF Scientific Advisory Council Meeting take place?",
	document=document,
	)
	print(answer)


	# Text to image
	boat_image = {name}.step("Generate an image of a boat in the water")
	boat_image.save("./boat_image.png")

	# Unconditional image captioning
	boat_image_caption = {name}.step("Can you caption the `boat_image`?", boat_image=boat_image)
	print(boat_image_caption)

	# Text to image -> Unconditional image captioning -> Text to speech
	boat_audio = {name}.step("Can you generate an image of a boat? Please read out loud the contents of the image afterwards")

	# Text downloading
	document = {name}.step("Download the text from http://hf.co")
	print(document)

	# Text summarization
	summary = {name}.step("Summarize the following text: `document`", document=document)
	print(summary)

	# Text downloading -> Text summarization -> Text to speech
	audio = {name}.step("Read out loud the summary of http://hf.co")
	```

	Chat-based execution (chat), the agent also has a chat-based approach, using the chat() method:
	```
	# Clean the chat history
	{name}.reset()

	# Text to image
	capybara_image = {name}.chat("Show me an an image of a capybara")
	capybara_image.save("./capybara_image.png")

	# Image transformation
	transformed_capybara_image = {name}.chat("Transform the image so that it snows")
	transformed_capybara_image.save("./transformed_capybara_image.png")

	# Image segmentation
	segmented_transformed_capybara_image = {name}.chat("Show me a mask of the snowy capybaras")
	segmented_transformed_capybara_image.save("./segmented_transformed_capybara_image.png")
	```
	"""
	super(HuggingFaceToolAgent, self).__init__(name, description)
	self.remote = remote

	def reset(self) -> None:
	r"""Resets the chat history of the agent."""
	self.agent.prepare_for_new_chat()

	def step(
	self,
	*args: Any,
	remote: Optional[bool] = None,
	**kwargs: Any,
	) -> Any:
	r"""Runs the agent in single execution mode.

	Args:
	*args (Any): Positional arguments to pass to the agent.
	remote (bool, optional): Flag indicating whether to run the agent
	remotely. Overrides the default setting. (default: :obj:`None`)
	**kwargs (Any): Keyword arguments to pass to the agent.

	Returns:
	str: The response from the agent.
	"""
	if remote is None:
	remote = self.remote
	agent_output = self.agent.run(args, remote=remote, *kwargs)
	if isinstance(agent_output, self.agent_image_type):
	agent_output = agent_output.to_raw()
	return agent_output

	def chat(
	self,
	*args: Any,
	remote: Optional[bool] = None,
	**kwargs: Any,
	) -> Any:
	r"""Runs the agent in a chat conversation mode.

	Args:
	*args (Any): Positional arguments to pass to the agent.
	remote (bool, optional): Flag indicating whether to run the agent
	remotely. Overrides the default setting. (default: :obj:`None`)
	**kwargs (Any): Keyword arguments to pass to the agent.

	Returns:
	str: The response from the agent.
	"""
	if remote is None:
	remote = self.remote
	agent_output = self.agent.chat(args, remote=remote, *kwargs)
	if isinstance(agent_output, self.agent_image_type):
	agent_output = agent_output.to_raw()
	return agent_output