Spaces:
Runtime error
Runtime error
| # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
| from typing import Any, Optional | |
| from camel.agents.tool_agents.base import BaseToolAgent | |
| # flake8: noqa :E501 | |
| class HuggingFaceToolAgent(BaseToolAgent): | |
| r"""Tool agent for calling HuggingFace models. This agent is a wrapper | |
| around agents from the `transformers` library. For more information | |
| about the available models, please see the `transformers` documentation | |
| at https://huggingface.co/docs/transformers/transformers_agents. | |
| Args: | |
| name (str): The name of the agent. | |
| *args (Any): Additional positional arguments to pass to the underlying | |
| Agent class. | |
| remote (bool, optional): Flag indicating whether to run the agent | |
| remotely. (default: :obj:`True`) | |
| **kwargs (Any): Additional keyword arguments to pass to the underlying | |
| Agent class. | |
| """ | |
| def __init__( | |
| self, | |
| name: str, | |
| *args: Any, | |
| remote: bool = True, | |
| **kwargs: Any, | |
| ) -> None: | |
| try: | |
| # TODO: Support other tool agents | |
| import transformers | |
| from packaging import version | |
| if version.parse(transformers.__version__) < version.parse( | |
| "4.31.0" | |
| ): | |
| raise ValueError( | |
| "The version of \"transformers\" package should >= 4.31.0" | |
| ) | |
| from transformers.tools import OpenAiAgent | |
| from transformers.tools.agent_types import AgentImage | |
| except (ImportError, ValueError): | |
| raise ValueError( | |
| "Could not import transformers tool agents. " | |
| "Please setup the environment with " | |
| "pip install huggingface_hub==0.14.1 transformers==4.31.0 diffusers accelerate==0.20.3 datasets torch soundfile sentencepiece opencv-python" | |
| ) | |
| self.agent_image_type = AgentImage | |
| self.agent = OpenAiAgent(*args, **kwargs) | |
| description = f"""The `{name}` is a tool agent that can perform a variety of tasks including: | |
| - Document question answering: given a document (such as a PDF) in image format, answer a question on this document | |
| - Text question answering: given a long text and a question, answer the question in the text | |
| - Unconditional image captioning: Caption the image! | |
| - Image question answering: given an image, answer a question on this image | |
| - Image segmentation: given an image and a prompt, output the segmentation mask of that prompt | |
| - Speech to text: given an audio recording of a person talking, transcribe the speech into text | |
| - Text to speech: convert text to speech | |
| - Zero-shot text classification: given a text and a list of labels, identify to which label the text corresponds the most | |
| - Text summarization: summarize a long text in one or a few sentences | |
| - Translation: translate the text into a given language | |
| - Text downloading: to download a text from a web URL | |
| - Text to image: generate an image according to a prompt, leveraging stable diffusion | |
| - Image transformation: modify an image given an initial image and a prompt, leveraging instruct pix2pix stable diffusion | |
| - Text to video: generate a small video according to a prompt | |
| Here are some python code examples of what you can do with this agent: | |
| Single execution (step) mode, the single execution method is when using the step() method of the agent: | |
| ``` | |
| # Text to image | |
| rivers_and_lakes_image = {name}.step("Draw me a picture of rivers and lakes.") | |
| rivers_and_lakes_image.save("./rivers_and_lakes_image.png") | |
| # Text to image -> Image transformation | |
| sea_add_island_image = {name}.step("Draw me a picture of the sea then transform the picture to add an island") | |
| sea_add_island_image.save("./sea_add_island_image.png") | |
| # If you'd like to keep a state across executions or to pass non-text objects to the agent, | |
| # you can do so by specifying variables that you would like the agent to use. For example, | |
| # you could generate the first image of rivers and lakes, and ask the model to update that picture to add an island by doing the following: | |
| picture = {name}.step("Generate a picture of rivers and lakes.") | |
| picture.save("./picture.png") | |
| updated_picture = {name}.step("Transform the image in `picture` to add an island to it.", picture=picture) | |
| updated_picture.save("./updated_picture.png") | |
| capybara_sea_image = {name}.step("Draw me a picture of the `prompt`", prompt="a capybara swimming in the sea") | |
| capybara_sea_image.save("./capybara_sea_image.png") | |
| # Document question answering | |
| answer = {name}.step( | |
| "In the following `document`, where will the TRRF Scientific Advisory Council Meeting take place?", | |
| document=document, | |
| ) | |
| print(answer) | |
| # Text to image | |
| boat_image = {name}.step("Generate an image of a boat in the water") | |
| boat_image.save("./boat_image.png") | |
| # Unconditional image captioning | |
| boat_image_caption = {name}.step("Can you caption the `boat_image`?", boat_image=boat_image) | |
| print(boat_image_caption) | |
| # Text to image -> Unconditional image captioning -> Text to speech | |
| boat_audio = {name}.step("Can you generate an image of a boat? Please read out loud the contents of the image afterwards") | |
| # Text downloading | |
| document = {name}.step("Download the text from http://hf.co") | |
| print(document) | |
| # Text summarization | |
| summary = {name}.step("Summarize the following text: `document`", document=document) | |
| print(summary) | |
| # Text downloading -> Text summarization -> Text to speech | |
| audio = {name}.step("Read out loud the summary of http://hf.co") | |
| ``` | |
| Chat-based execution (chat), the agent also has a chat-based approach, using the chat() method: | |
| ``` | |
| # Clean the chat history | |
| {name}.reset() | |
| # Text to image | |
| capybara_image = {name}.chat("Show me an an image of a capybara") | |
| capybara_image.save("./capybara_image.png") | |
| # Image transformation | |
| transformed_capybara_image = {name}.chat("Transform the image so that it snows") | |
| transformed_capybara_image.save("./transformed_capybara_image.png") | |
| # Image segmentation | |
| segmented_transformed_capybara_image = {name}.chat("Show me a mask of the snowy capybaras") | |
| segmented_transformed_capybara_image.save("./segmented_transformed_capybara_image.png") | |
| ``` | |
| """ | |
| super(HuggingFaceToolAgent, self).__init__(name, description) | |
| self.remote = remote | |
| def reset(self) -> None: | |
| r"""Resets the chat history of the agent.""" | |
| self.agent.prepare_for_new_chat() | |
| def step( | |
| self, | |
| *args: Any, | |
| remote: Optional[bool] = None, | |
| **kwargs: Any, | |
| ) -> Any: | |
| r"""Runs the agent in single execution mode. | |
| Args: | |
| *args (Any): Positional arguments to pass to the agent. | |
| remote (bool, optional): Flag indicating whether to run the agent | |
| remotely. Overrides the default setting. (default: :obj:`None`) | |
| **kwargs (Any): Keyword arguments to pass to the agent. | |
| Returns: | |
| str: The response from the agent. | |
| """ | |
| if remote is None: | |
| remote = self.remote | |
| agent_output = self.agent.run(*args, remote=remote, **kwargs) | |
| if isinstance(agent_output, self.agent_image_type): | |
| agent_output = agent_output.to_raw() | |
| return agent_output | |
| def chat( | |
| self, | |
| *args: Any, | |
| remote: Optional[bool] = None, | |
| **kwargs: Any, | |
| ) -> Any: | |
| r"""Runs the agent in a chat conversation mode. | |
| Args: | |
| *args (Any): Positional arguments to pass to the agent. | |
| remote (bool, optional): Flag indicating whether to run the agent | |
| remotely. Overrides the default setting. (default: :obj:`None`) | |
| **kwargs (Any): Keyword arguments to pass to the agent. | |
| Returns: | |
| str: The response from the agent. | |
| """ | |
| if remote is None: | |
| remote = self.remote | |
| agent_output = self.agent.chat(*args, remote=remote, **kwargs) | |
| if isinstance(agent_output, self.agent_image_type): | |
| agent_output = agent_output.to_raw() | |
| return agent_output | |