First_agent_template

Sleeping

App Files Files Community

PolPC13 commited on Nov 20, 2025

Commit

6025aa5

1 Parent(s): 4d31f6b

Modified prompt.yaml.

Browse files

Files changed (3) hide show

app.py +5 -2
prompts.yaml +4 -0
tools/new_tools.py +107 -173

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from tools.exchange_rates import ExchangeRatesTool
 from tools.web_search import ddgs
 from tools.visit_webpage import VisitWebpageTool
 from Gradio_UI import GradioUI
 final_answer = FinalAnswerTool()
@@ -30,7 +32,7 @@ with open("prompts.yaml", 'r') as stream:
 agent = CodeAgent(
     model=model,
-    tools=[final_answer, ddgs(), VisitWebpageTool(), ExchangeRatesTool()], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,
@@ -41,4 +43,5 @@ agent = CodeAgent(
 )
-GradioUI(agent).launch()

 from tools.web_search import ddgs
 from tools.visit_webpage import VisitWebpageTool
 from Gradio_UI import GradioUI
+from new_tools import QueryImageTool, ReverseStringTool, WikiSearchTool
+from tools.get_current_time import GetCurrentTimeTool
 final_answer = FinalAnswerTool()
 agent = CodeAgent(
     model=model,
+    tools=[final_answer, ddgs(), VisitWebpageTool(), ExchangeRatesTool(), WikiSearchTool, QueryImageTool(), ReverseStringTool(), GetCurrentTimeTool(), image_generation_tool], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,
 )
+if __name__ == "__main__":
+    GradioUI(agent).launch()

prompts.yaml CHANGED Viewed

@@ -8,6 +8,10 @@
   During each intermediate step, you can use 'print()' to save whatever important information you will then need.
   These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
   In the end you have to return a final answer using the `final_answer` tool.
   Here are a few examples using notional tools:
   ---

   During each intermediate step, you can use 'print()' to save whatever important information you will then need.
   These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
   In the end you have to return a final answer using the `final_answer` tool.
+  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
   Here are a few examples using notional tools:
   ---

tools/new_tools.py CHANGED Viewed

@@ -1,187 +1,121 @@
-from langchain_core.messages import HumanMessage
-from langchain_core.tools import tool, Tool
-from langchain_together import ChatTogether
-from langgraph.prebuilt import create_react_agent
-from langchain_community.retrievers import WikipediaRetriever
-from langchain_community.tools import BraveSearch
-from langchain_experimental.utilities import PythonREPL
-from langchain_community.agent_toolkits.load_tools import load_tools
-import requests
-from langgraph_supervisor import create_supervisor
-from youtube_transcript_api import YouTubeTranscriptApi
-from pytubefix import extract, YouTube
-import whisper
 from qwen_vl_utils import process_vision_info
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
-@tool
-def wiki_search(query: str) -> str:
-    """Search Wikipedia for query and return maximum 3 results
-    Args:
-        query (str): query to search on Wikipedia
-    Returns:
-        wiki_result (str): result of search
-    """
-    try:
-        retriever = WikipediaRetriever()
-        wiki_result = retriever.invoke(query)
-        return wiki_result
-    except Exception as e:
-        return f"wiki_search failed {e}"
-@tool
-def query_image(query: str, image_url: str):
-    """Analyze the query on an image using a VLM
-    Args:
-        query (str): query about the image
-        image_url (str): link to the image
-    Returns:
-        response (str): response to the query on image
-    """
-    try:
-        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-            "Qwen/Qwen2.5-VL-3B-Instruct", torch_dtype="auto", device_map="auto"
-        )
-        processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image",
-                        "image": image_url,
-                        "max_pixels": 360 * 420,
-                    },
-                    {"type": "text", "text": query},
-                ],
-            }
-        ]
-        text = processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-        image_inputs, video_inputs = process_vision_info(messages)
-        inputs = processor(
-            text=[text],
-            images=image_inputs,
-            videos=video_inputs,
-            padding=True,
-            return_tensors="pt",
-        )
-        inputs = inputs.to("cuda")
-        # Inference
-        generated_ids = model.generate(**inputs, max_new_tokens=128)
-        generated_ids_trimmed = [
-            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-        ]
-        output_text = processor.batch_decode(
-            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )
-        print(output_text)
-        return output_text
-    except Exception as e:
-        return f"query_image failed {e}"
-@tool
-def reverse_string(input_string: str) -> str:
-    """Reverse the character order of input string.
-    Args:
-        input_string (str): string to reverse
-    Returns:
-        reversed_string (str): reversed string
-    """
-    try:
-        reversed_string = input_string[::-1]
-        reversed_string = f"The reversed string returned from reverse_string function is: {reversed_string}"
-        return reversed_string
-    except Exception as e:
-        return f"reverse_string failed {e}"
-repl = PythonREPL()
-python_repl_tool = Tool(
-    name="python_repl",
-    description="""A Python shell. Use this to execute python commands.
-        Input should be a valid python command.
-        Input should be a valid Python expression or script.
-        If you want to see the output of a value, you should print it out with `print(...)`.
-        Always return the printed code output.
-        Example: print(2 + 2) → will return 4
-        Do NOT execute code that could be harmful to the host system.
-        You are allowed to download files from URLs.""",
-    func=repl.run
-)
-class langgraph_agent:
-    def __init__(self):
-        llm = ChatTogether(
-            model="Qwen/Qwen3-235B-A22B-fp8-tput",
-            temperature=0
-        )
-        helper_llm = ChatTogether(
-            model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
-            temperature=0
-        )
-        research_agent = create_react_agent(
-            llm,
-            tools=research_tools,
-            name="research_agent",
-            prompt=(
-                    "You are a research agent. You have access to web_search tool to search the web, wiki_search tool to search wikipedia\n\n"
-                    "INSTRUCTIONS:\n"
-                    "- Assist ONLY with research tasks\n"
-                    "- After you're done with your tasks, respond to the supervisor directly\n"
-                    "- Respond ONLY with the results of your work, do NOT include ANY other text."
-            ),
-        )
-        vision_agent = create_react_agent(
-            helper_llm,
-            tools=vision_tools,
-            name="vision_agent",
-            prompt=(
-                    "You are a vision agent. You have access to the following tools: \n"
-                    "   query_image(query: str, image_url: str): \n"
-                    "       Args:\n"
-                    "           query (str): query on the image \n"
-                    "           image_url (str): link to the image \n"
-                    "       Returns:\n"
-                    "           response (str): response to the query after analyzing image \n\n"
-                    "   query_video(query: str, video_url: str): \n"
-                    "       Args:\n"
-                    "           query (str): query on the video\n"
-                    "           video_url (str): link to the video \n"
-                    "       Returns: \n"
-                    "           response (str): response to the query after analyzing video \n\n"
-                    "INSTRUCTIONS:\n"
-                    "- Assist ONLY with vision related tasks\n"
-                    "- After you're done with your tasks, respond to the supervisor directly\n"
-                    "- Respond ONLY with the results of your work, do NOT include ANY other text."
-            ),
-        )
-        python_agent = create_react_agent(
-            helper_llm,
-            tools=[python_repl_tool],
-            name="python_agent",
-            prompt=(
-                    "You are a python coding agent with access to a python REPL. You will be given a query and a link to a piece of python code. Retrieve and execute the linked code with python_repl tool to answer the query. \n\n"
-                    "INSTRUCTIONS:\n"
-                    "- Assist ONLY with python coding tasks\n"
-                    "- You are allowed to download files from given URLs \n"
-                    "- Do not execute code that can be harmful to host system \n"
-                    "- If there is Exception thrown during execution, try to debug your code, then execute again. \n"
-                    "- Always transfer any printed output from executed code to supervisor \n"
-                    "- After you're done with your tasks, respond to the supervisor directly\n"
-                    "- Respond ONLY with the results of your work, do NOT include ANY other text."
-            ),
-        )

+from smolagents import Tool
+from transformers import AutoProcessor
 from qwen_vl_utils import process_vision_info
+from qwen_vl_utils import Qwen2_5_VLForConditionalGeneration  # adjust import to your setup
+# from langchain_community.retrievers import WikipediaRetriever  # or your existing retriever
+class WikiSearchTool(Tool):
+    name = "wiki_search"
+    description = (
+        "Search Wikipedia for a query and return at most 3 results. "
+        "Args: query (str). Returns: search result as text."
+    )
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "Query string to search on Wikipedia."
+        }
+    }
+    output_type = "string"
+    def forward(self, query: str) -> str:
+        try:
+            # Use your existing WikipediaRetriever, make sure it's imported.
+            retriever = WikipediaRetriever(top_k_results=3)
+            wiki_result = retriever.invoke(query)
+            return wiki_result
+        except Exception as e:
+            return f"wiki_search failed {e}"
+class QueryImageTool(Tool):
+    name = "query_image"
+    description = (
+        "Analyze an image with a VLM given a natural language query and an image URL. "
+        "Args: query (str), image_url (str). Returns: textual response."
+    )
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "Question or instruction about the image."
+        },
+        "image_url": {
+            "type": "string",
+            "description": "Public URL of the image to analyze."
+        },
+    }
+    output_type = "string"
+    def forward(self, query: str, image_url: str) -> str:
+        try:
+            model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                "Qwen/Qwen2.5-VL-3B-Instruct", torch_dtype="auto", device_map="auto"
+            )
+            processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image",
+                            "image": image_url,
+                            "max_pixels": 360 * 420,
+                        },
+                        {"type": "text", "text": query},
+                    ],
+                }
+            ]
+            text = processor.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+            image_inputs, video_inputs = process_vision_info(messages)
+            inputs = processor(
+                text=[text],
+                images=image_inputs,
+                videos=video_inputs,
+                padding=True,
+                return_tensors="pt",
+            )
+            inputs = inputs.to("cuda")
+            generated_ids = model.generate(**inputs, max_new_tokens=128)
+            generated_ids_trimmed = [
+                out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+            ]
+            output_text = processor.batch_decode(
+                generated_ids_trimmed,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=False,
+            )
+            # original function returned a list; keep or cast to str as you prefer
+            return output_text[0] if isinstance(output_text, list) else output_text
+        except Exception as e:
+            return f"query_image failed {e}"
+class ReverseStringTool(Tool):
+    name = "reverse_string"
+    description = (
+        "Reverse the characters of the given string and wrap it in an explanatory sentence."
+    )
+    inputs = {
+        "input_string": {
+            "type": "string",
+            "description": "String whose characters will be reversed."
+        }
+    }
+    output_type = "string"
+    def forward(self, input_string: str) -> str:
+        try:
+            reversed_string = input_string[::-1]
+            reversed_string = (
+                f"The reversed string returned from reverse_string function is: {reversed_string}"
+            )
+            return reversed_string
+        except Exception as e:
+            return f"reverse_string failed {e}"