PolPC13 commited on
Commit
6025aa5
·
1 Parent(s): 4d31f6b

Modified prompt.yaml.

Browse files
Files changed (3) hide show
  1. app.py +5 -2
  2. prompts.yaml +4 -0
  3. tools/new_tools.py +107 -173
app.py CHANGED
@@ -8,6 +8,8 @@ from tools.exchange_rates import ExchangeRatesTool
8
  from tools.web_search import ddgs
9
  from tools.visit_webpage import VisitWebpageTool
10
  from Gradio_UI import GradioUI
 
 
11
 
12
  final_answer = FinalAnswerTool()
13
 
@@ -30,7 +32,7 @@ with open("prompts.yaml", 'r') as stream:
30
 
31
  agent = CodeAgent(
32
  model=model,
33
- tools=[final_answer, ddgs(), VisitWebpageTool(), ExchangeRatesTool()], ## add your tools here (don't remove final answer)
34
  max_steps=6,
35
  verbosity_level=1,
36
  grammar=None,
@@ -41,4 +43,5 @@ agent = CodeAgent(
41
  )
42
 
43
 
44
- GradioUI(agent).launch()
 
 
8
  from tools.web_search import ddgs
9
  from tools.visit_webpage import VisitWebpageTool
10
  from Gradio_UI import GradioUI
11
+ from new_tools import QueryImageTool, ReverseStringTool, WikiSearchTool
12
+ from tools.get_current_time import GetCurrentTimeTool
13
 
14
  final_answer = FinalAnswerTool()
15
 
 
32
 
33
  agent = CodeAgent(
34
  model=model,
35
+ tools=[final_answer, ddgs(), VisitWebpageTool(), ExchangeRatesTool(), WikiSearchTool, QueryImageTool(), ReverseStringTool(), GetCurrentTimeTool(), image_generation_tool], ## add your tools here (don't remove final answer)
36
  max_steps=6,
37
  verbosity_level=1,
38
  grammar=None,
 
43
  )
44
 
45
 
46
+ if __name__ == "__main__":
47
+ GradioUI(agent).launch()
prompts.yaml CHANGED
@@ -8,6 +8,10 @@
8
  During each intermediate step, you can use 'print()' to save whatever important information you will then need.
9
  These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
10
  In the end you have to return a final answer using the `final_answer` tool.
 
 
 
 
11
 
12
  Here are a few examples using notional tools:
13
  ---
 
8
  During each intermediate step, you can use 'print()' to save whatever important information you will then need.
9
  These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
10
  In the end you have to return a final answer using the `final_answer` tool.
11
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
12
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
13
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
14
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
15
 
16
  Here are a few examples using notional tools:
17
  ---
tools/new_tools.py CHANGED
@@ -1,187 +1,121 @@
1
- from langchain_core.messages import HumanMessage
2
- from langchain_core.tools import tool, Tool
3
- from langchain_together import ChatTogether
4
- from langgraph.prebuilt import create_react_agent
5
- from langchain_community.retrievers import WikipediaRetriever
6
- from langchain_community.tools import BraveSearch
7
- from langchain_experimental.utilities import PythonREPL
8
- from langchain_community.agent_toolkits.load_tools import load_tools
9
- import requests
10
- from langgraph_supervisor import create_supervisor
11
- from youtube_transcript_api import YouTubeTranscriptApi
12
- from pytubefix import extract, YouTube
13
- import whisper
14
  from qwen_vl_utils import process_vision_info
15
- from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
 
16
 
17
- @tool
18
- def wiki_search(query: str) -> str:
19
- """Search Wikipedia for query and return maximum 3 results
20
-
21
- Args:
22
- query (str): query to search on Wikipedia
23
- Returns:
24
- wiki_result (str): result of search
25
- """
26
- try:
27
- retriever = WikipediaRetriever()
28
- wiki_result = retriever.invoke(query)
29
- return wiki_result
30
- except Exception as e:
31
- return f"wiki_search failed {e}"
32
 
33
- @tool
34
- def query_image(query: str, image_url: str):
35
- """Analyze the query on an image using a VLM
36
-
37
- Args:
38
- query (str): query about the image
39
- image_url (str): link to the image
40
- Returns:
41
- response (str): response to the query on image
42
- """
43
- try:
44
- model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
45
- "Qwen/Qwen2.5-VL-3B-Instruct", torch_dtype="auto", device_map="auto"
46
- )
47
 
48
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
 
 
 
 
 
 
 
49
 
50
- messages = [
51
- {
52
- "role": "user",
53
- "content": [
54
- {
55
- "type": "image",
56
- "image": image_url,
57
- "max_pixels": 360 * 420,
58
- },
59
- {"type": "text", "text": query},
60
- ],
61
- }
62
- ]
63
 
64
- text = processor.apply_chat_template(
65
- messages, tokenize=False, add_generation_prompt=True
66
- )
67
- image_inputs, video_inputs = process_vision_info(messages)
68
- inputs = processor(
69
- text=[text],
70
- images=image_inputs,
71
- videos=video_inputs,
72
- padding=True,
73
- return_tensors="pt",
74
- )
75
- inputs = inputs.to("cuda")
 
 
 
 
 
76
 
77
- # Inference
78
- generated_ids = model.generate(**inputs, max_new_tokens=128)
79
- generated_ids_trimmed = [
80
- out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
81
- ]
82
- output_text = processor.batch_decode(
83
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
84
- )
85
- print(output_text)
86
- return output_text
87
 
88
- except Exception as e:
89
- return f"query_image failed {e}"
90
 
91
- @tool
92
- def reverse_string(input_string: str) -> str:
93
- """Reverse the character order of input string.
94
-
95
- Args:
96
- input_string (str): string to reverse
97
- Returns:
98
- reversed_string (str): reversed string
99
- """
100
- try:
101
- reversed_string = input_string[::-1]
102
- reversed_string = f"The reversed string returned from reverse_string function is: {reversed_string}"
103
- return reversed_string
104
- except Exception as e:
105
- return f"reverse_string failed {e}"
106
 
107
- repl = PythonREPL()
108
- python_repl_tool = Tool(
109
- name="python_repl",
110
- description="""A Python shell. Use this to execute python commands.
111
- Input should be a valid python command.
112
- Input should be a valid Python expression or script.
113
- If you want to see the output of a value, you should print it out with `print(...)`.
114
- Always return the printed code output.
115
- Example: print(2 + 2) → will return 4
116
- Do NOT execute code that could be harmful to the host system.
117
- You are allowed to download files from URLs.""",
118
- func=repl.run
119
- )
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- class langgraph_agent:
123
- def __init__(self):
124
- llm = ChatTogether(
125
- model="Qwen/Qwen3-235B-A22B-fp8-tput",
126
- temperature=0
127
- )
128
 
129
- helper_llm = ChatTogether(
130
- model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
131
- temperature=0
132
- )
 
 
 
 
 
 
 
 
133
 
134
- research_agent = create_react_agent(
135
- llm,
136
- tools=research_tools,
137
- name="research_agent",
138
- prompt=(
139
- "You are a research agent. You have access to web_search tool to search the web, wiki_search tool to search wikipedia\n\n"
140
- "INSTRUCTIONS:\n"
141
- "- Assist ONLY with research tasks\n"
142
- "- After you're done with your tasks, respond to the supervisor directly\n"
143
- "- Respond ONLY with the results of your work, do NOT include ANY other text."
144
- ),
145
- )
146
-
147
- vision_agent = create_react_agent(
148
- helper_llm,
149
- tools=vision_tools,
150
- name="vision_agent",
151
- prompt=(
152
- "You are a vision agent. You have access to the following tools: \n"
153
- " query_image(query: str, image_url: str): \n"
154
- " Args:\n"
155
- " query (str): query on the image \n"
156
- " image_url (str): link to the image \n"
157
- " Returns:\n"
158
- " response (str): response to the query after analyzing image \n\n"
159
- " query_video(query: str, video_url: str): \n"
160
- " Args:\n"
161
- " query (str): query on the video\n"
162
- " video_url (str): link to the video \n"
163
- " Returns: \n"
164
- " response (str): response to the query after analyzing video \n\n"
165
- "INSTRUCTIONS:\n"
166
- "- Assist ONLY with vision related tasks\n"
167
- "- After you're done with your tasks, respond to the supervisor directly\n"
168
- "- Respond ONLY with the results of your work, do NOT include ANY other text."
169
- ),
170
- )
171
-
172
- python_agent = create_react_agent(
173
- helper_llm,
174
- tools=[python_repl_tool],
175
- name="python_agent",
176
- prompt=(
177
- "You are a python coding agent with access to a python REPL. You will be given a query and a link to a piece of python code. Retrieve and execute the linked code with python_repl tool to answer the query. \n\n"
178
- "INSTRUCTIONS:\n"
179
- "- Assist ONLY with python coding tasks\n"
180
- "- You are allowed to download files from given URLs \n"
181
- "- Do not execute code that can be harmful to host system \n"
182
- "- If there is Exception thrown during execution, try to debug your code, then execute again. \n"
183
- "- Always transfer any printed output from executed code to supervisor \n"
184
- "- After you're done with your tasks, respond to the supervisor directly\n"
185
- "- Respond ONLY with the results of your work, do NOT include ANY other text."
186
- ),
187
- )
 
1
+ from smolagents import Tool
2
+ from transformers import AutoProcessor
 
 
 
 
 
 
 
 
 
 
 
3
  from qwen_vl_utils import process_vision_info
4
+ from qwen_vl_utils import Qwen2_5_VLForConditionalGeneration # adjust import to your setup
5
+ # from langchain_community.retrievers import WikipediaRetriever # or your existing retriever
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ class WikiSearchTool(Tool):
9
+ name = "wiki_search"
10
+ description = (
11
+ "Search Wikipedia for a query and return at most 3 results. "
12
+ "Args: query (str). Returns: search result as text."
13
+ )
14
+ inputs = {
15
+ "query": {
16
+ "type": "string",
17
+ "description": "Query string to search on Wikipedia."
18
+ }
19
+ }
20
+ output_type = "string"
 
21
 
22
+ def forward(self, query: str) -> str:
23
+ try:
24
+ # Use your existing WikipediaRetriever, make sure it's imported.
25
+ retriever = WikipediaRetriever(top_k_results=3)
26
+ wiki_result = retriever.invoke(query)
27
+ return wiki_result
28
+ except Exception as e:
29
+ return f"wiki_search failed {e}"
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ class QueryImageTool(Tool):
33
+ name = "query_image"
34
+ description = (
35
+ "Analyze an image with a VLM given a natural language query and an image URL. "
36
+ "Args: query (str), image_url (str). Returns: textual response."
37
+ )
38
+ inputs = {
39
+ "query": {
40
+ "type": "string",
41
+ "description": "Question or instruction about the image."
42
+ },
43
+ "image_url": {
44
+ "type": "string",
45
+ "description": "Public URL of the image to analyze."
46
+ },
47
+ }
48
+ output_type = "string"
49
 
50
+ def forward(self, query: str, image_url: str) -> str:
51
+ try:
52
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
53
+ "Qwen/Qwen2.5-VL-3B-Instruct", torch_dtype="auto", device_map="auto"
54
+ )
 
 
 
 
 
55
 
56
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
 
57
 
58
+ messages = [
59
+ {
60
+ "role": "user",
61
+ "content": [
62
+ {
63
+ "type": "image",
64
+ "image": image_url,
65
+ "max_pixels": 360 * 420,
66
+ },
67
+ {"type": "text", "text": query},
68
+ ],
69
+ }
70
+ ]
 
 
71
 
72
+ text = processor.apply_chat_template(
73
+ messages, tokenize=False, add_generation_prompt=True
74
+ )
75
+ image_inputs, video_inputs = process_vision_info(messages)
76
+ inputs = processor(
77
+ text=[text],
78
+ images=image_inputs,
79
+ videos=video_inputs,
80
+ padding=True,
81
+ return_tensors="pt",
82
+ )
83
+ inputs = inputs.to("cuda")
 
84
 
85
+ generated_ids = model.generate(**inputs, max_new_tokens=128)
86
+ generated_ids_trimmed = [
87
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
88
+ ]
89
+ output_text = processor.batch_decode(
90
+ generated_ids_trimmed,
91
+ skip_special_tokens=True,
92
+ clean_up_tokenization_spaces=False,
93
+ )
94
+ # original function returned a list; keep or cast to str as you prefer
95
+ return output_text[0] if isinstance(output_text, list) else output_text
96
+ except Exception as e:
97
+ return f"query_image failed {e}"
98
 
 
 
 
 
 
 
99
 
100
+ class ReverseStringTool(Tool):
101
+ name = "reverse_string"
102
+ description = (
103
+ "Reverse the characters of the given string and wrap it in an explanatory sentence."
104
+ )
105
+ inputs = {
106
+ "input_string": {
107
+ "type": "string",
108
+ "description": "String whose characters will be reversed."
109
+ }
110
+ }
111
+ output_type = "string"
112
 
113
+ def forward(self, input_string: str) -> str:
114
+ try:
115
+ reversed_string = input_string[::-1]
116
+ reversed_string = (
117
+ f"The reversed string returned from reverse_string function is: {reversed_string}"
118
+ )
119
+ return reversed_string
120
+ except Exception as e:
121
+ return f"reverse_string failed {e}"