AliA1997 commited on
Commit
8bed67e
·
1 Parent(s): 6619686

Completed some demos from huggingface tutorials.

Browse files
app.py CHANGED
@@ -1,77 +1,148 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
3
 
 
 
4
 
 
 
 
 
 
 
 
 
 
5
  def respond(
6
  message,
7
  history: list[dict[str, str]],
8
  system_message,
9
  max_tokens,
10
  temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
  ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
- messages = [{"role": "system", "content": system_message}]
20
-
21
- messages.extend(history)
22
-
23
- messages.append({"role": "user", "content": message})
24
-
25
- response = ""
26
-
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
-
47
- # Function to clear chat
48
- def clear_chat():
49
- return gr.update(value=[])
50
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  chatbot = gr.ChatInterface(
52
- respond,
53
  type="messages",
54
- additional_inputs=[
55
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
56
- ],
57
  )
58
 
 
59
  with gr.Blocks() as demo:
60
- with gr.Sidebar():
61
- gr.LoginButton()
62
 
63
  chatbot.render()
64
-
65
- # Define a button outside ChatInterface (but visually below)
66
- clear_button = gr.Button("🧹 Clear Chat", variant="secondary")
67
-
68
- # Attach event listener to the button
69
- clear_button.click(
70
- fn=clear_chat,
71
- inputs=None,
72
- outputs=chatbot
73
- )
74
-
75
-
 
 
 
 
76
  if __name__ == "__main__":
77
  demo.launch()
 
1
+ import json
2
  import gradio as gr
3
+ import os
4
+ from huggingface_hub import InferenceClient, login
5
+ from image_agent import init_stored_image_agent, stored_images
6
+ from dynamic_image_agent import main
7
+ from mulitagents import define_multi_agent
8
+ from nsfw_detection import classify_image_if_nsfw
9
 
10
+ login(os.environ.get('HF_TOKEN'))
11
+ os.environ["OPENAI_API_KEY"] = os.environ.get('OPENAI_API_KEY')
12
 
13
+ # --- Global state ---
14
+ active_agent = None # will store which agent is currently selected
15
+
16
+ def init_and_extend_messages(system_msg: object, history: list[dict[str, str]]):
17
+ messages = [system_msg]
18
+ messages.extend(history)
19
+ return messages
20
+
21
+ # --- Respond function ---
22
  def respond(
23
  message,
24
  history: list[dict[str, str]],
25
  system_message,
26
  max_tokens,
27
  temperature,
28
+ top_p
 
29
  ):
30
+ """Routes the user message to the active agent."""
31
+ global active_agent
32
+
33
+ if not active_agent:
34
+ return "⚠️ Please select an agent before chatting."
35
+
36
+
37
+
38
+ try:
39
+ # Route message to correct agent
40
+ if active_agent == "stored":
41
+ response = init_stored_image_agent().run(
42
+ f"""
43
+ {message}
44
+ """,
45
+ images=stored_images
46
+ )
47
+ elif active_agent == "dynamic":
48
+ json_response = main()
49
+ print("JSON RESPONSE:", json_response)
50
+ if isinstance(json_response, str):
51
+ return json_response
52
+ else:
53
+ try:
54
+ response = json.dumps(json_response, indent=4)
55
+ return response
56
+ except (json.JSONDecodeError, TypeError):
57
+ # JSONDecodeError for invalid JSON format in a string
58
+ # TypeError if the input is not a string or bytes-like object
59
+ print("Error parsing json response:", json_response)
60
+ return "Error generating response"
61
+ elif active_agent == "multi":
62
+ manager_agent = define_multi_agent()
63
+ manager_agent.visualize()
64
+
65
+ json_response = manager_agent.run(f"""{message}""")
66
+
67
+ manager_agent.python_executor.state["fig"]
68
+ if isinstance(json_response, str):
69
+ response = json_response
70
+ return response
71
+ else:
72
+ try:
73
+ response = json.dumps(json_response, indent=4)
74
+ return response
75
+ except (json.JSONDecodeError, TypeError):
76
+ # JSONDecodeError for invalid JSON format in a string
77
+ # TypeError if the input is not a string or bytes-like object
78
+ print("Error parsing json response:", json_response)
79
+ return "Error generating response"
80
+ elif active_agent == "nsfw check":
81
+ json_response = classify_image_if_nsfw(message)
82
+ response = json.dumps(json_response, indent=4)
83
+ return response
84
+ else:
85
+ response = f"Unknown agent selected."
86
+ except Exception as e:
87
+ print("Exception:", str(e))
88
+
89
+ response = f"⚠️ Error: {e}"
90
+
91
+ return response
92
+
93
+
94
+ # --- Button Handlers ---
95
+ def use_stored_image_agent():
96
+ global active_agent
97
+ active_agent = "stored"
98
+ return "✅ Switched to Stored Image Agent."
99
+
100
+
101
+ def use_dynamic_image_agent():
102
+ global active_agent
103
+ active_agent = "dynamic"
104
+ return "✅ Switched to Dynamic Image Agent."
105
+
106
+
107
+ def use_multi_agent():
108
+ global active_agent
109
+ active_agent = "multi"
110
+ return "✅ Switched to Multi-Agent mode."
111
+
112
+
113
+ def use_nsfw_check():
114
+ global active_agent
115
+ active_agent = "nsfw check"
116
+ classify_image_if_nsfw("https://static.api4.ai/api4.ai/nsfw/demo-pic-1.jpg")
117
+ return "✅ Switched to NSFW Check mode."
118
+
119
+
120
+ # --- Chat Interface ---
121
  chatbot = gr.ChatInterface(
122
+ fn=respond,
123
  type="messages",
124
+ additional_inputs=[]
 
 
125
  )
126
 
127
+ # --- Layout ---
128
  with gr.Blocks() as demo:
 
 
129
 
130
  chatbot.render()
131
+ gr.Markdown("### 🧩 Choose an Agent:")
132
+ with gr.Row():
133
+ stored_img_button = gr.Button("Checked Stored Superheros", variant="secondary")
134
+ dynamic_img_button = gr.Button("Dynamically look for superheros", variant="primary")
135
+ multi_agent_button = gr.Button("Search superhero's using multiple agents", variant="secondary")
136
+ check_nsfw_button = gr.Button("NSFW Check on Image", variant="stop")
137
+
138
+ # Display agent switch confirmation message
139
+ status_box = gr.Textbox(label="Agent Status", interactive=False)
140
+
141
+ stored_img_button.click(fn=use_stored_image_agent, inputs=None, outputs=status_box)
142
+ dynamic_img_button.click(fn=use_dynamic_image_agent, inputs=None, outputs=status_box)
143
+ multi_agent_button.click(fn=use_multi_agent, inputs=None, outputs=status_box)
144
+ check_nsfw_button.click(fn= use_nsfw_check, inputs=None, outputs=status_box)
145
+
146
+ # --- Run app ---
147
  if __name__ == "__main__":
148
  demo.launch()
debug_image.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9feb6223827c2f855913bfe56f43b8463ea61c448dede1765dd3cf92d1e1d549
3
+ size 92
dynamic_image_agent.py CHANGED
@@ -1,18 +1,19 @@
1
- from langchain_community.tools.ddg_search.tool import DuckDuckGoSearchTool
2
- from io import BytesIO
3
- from PIL import Image
4
  from time import sleep
 
5
 
6
- import helium
7
  from dotenv import load_dotenv
 
8
  from selenium import webdriver
9
  from selenium.webdriver.common.by import By
10
  from selenium.webdriver.common.keys import Keys
11
 
12
- from smolagents import tool, CodeAgent, OpenAIServerModel
13
  from smolagents.agents import ActionStep
14
  from smolagents.cli import load_model
15
-
16
 
17
  agent_request = """
18
  I am alfred, the butler of wayne manor, responsible for verifying the identity of guests at party. A superhero has arrived at the entrance claiming to be Wonderwomen, but I need to confirm if she is who she says she is.
@@ -74,35 +75,40 @@ def close_popups() -> str:
74
  """
75
  webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
76
 
77
- def save_screenshot(step_log: ActionStep, agent: CodeAgent) -> None:
78
- sleep(1.0)
79
- driver = helium.get_driver()
80
- current_step = step_log.step_number
81
- if driver is not None:
82
- for step_logs in agent.logs:
83
- if isinstance(step_log, ActionStep) and step_log.step_number <= current_step - 2:
84
- step_logs.observation_images = None
85
- png_bytes = driver.get_screenshot_as_png()
86
- image = Image.open(BytesIO(png_bytes))
87
- print(f"Captured a browser screenshot: {image.size} pixels")
88
- step_log.observation_images = image = [image.copy()]
89
-
90
- # Update observations with current URL
91
- url_info = f"Current url: {driver.current_url}"
92
- step_log.observations = url_info if step_logs.observations is None else step_log.observations + "\n" + url_info
93
- return
94
-
95
- model = OpenAIServerModel(model="gpt-4o")
96
-
97
- def initialize_agent(model: OpenAIServerModel):
98
- return CodeAgent(
99
- tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f, save_screenshot],
100
- model=model,
101
- additional_authorized_imports=["helium"],
102
- step_callbacks=[save_screenshot],
103
- max_steps=20,
104
- verbosity_level=2
105
- )
 
 
 
 
 
106
 
107
  def initialize_driver():
108
  """Initialize the Selenium WebDriver."""
@@ -159,6 +165,7 @@ Proceed in several steps rather than trying to solve the task in one shot.
159
  And at the end, only when you have your answer, return your final answer.
160
  Code:
161
  ```py
 
162
  final_answer("YOUR_ANSWER_HERE")
163
  ```<end_code>
164
  If pages seem stuck on loading, you might have to wait, for instance `import time` and run `time.sleep(5.0)`. But don't overuse this!
@@ -170,11 +177,18 @@ Don't kill the browser.
170
  When you have modals or cookie banners on screen, you should get rid of them before you can click anything else.
171
  """
172
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  def main():
175
- # Load environment variables
176
- # For example to use an OpenAI model, create a local .env file with OPENAI_API_KEY="<your_open_ai_key_here>"
177
- load_dotenv()
178
 
179
  # Parse command line arguments
180
  args = parse_arguments()
@@ -184,13 +198,12 @@ def main():
184
 
185
  global driver
186
  driver = initialize_driver()
187
- dynamic_image_agent = initialize_agent(model)
188
- dynamic_image_agent.run(agent_request)
189
 
190
  # Run the agent with the provided prompt
191
- dynamic_image_agent.python_executor("from helium import *")
192
- dynamic_image_agent.run(args.prompt + helium_instructions)
193
 
194
 
195
  if __name__ == "__main__":
196
- main()
 
1
+ import argparse
2
+ import os
3
+ import helium
4
  from time import sleep
5
+ from datetime import datetime
6
 
 
7
  from dotenv import load_dotenv
8
+ from PIL import Image
9
  from selenium import webdriver
10
  from selenium.webdriver.common.by import By
11
  from selenium.webdriver.common.keys import Keys
12
 
13
+ from smolagents import tool, CodeAgent, DuckDuckGoSearchTool, Model, InferenceClientModel
14
  from smolagents.agents import ActionStep
15
  from smolagents.cli import load_model
16
+ from io import BytesIO
17
 
18
  agent_request = """
19
  I am alfred, the butler of wayne manor, responsible for verifying the identity of guests at party. A superhero has arrived at the entrance claiming to be Wonderwomen, but I need to confirm if she is who she says she is.
 
75
  """
76
  webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
77
 
78
+
79
+ def save_screenshot(step):
80
+ """Save screenshot of the agent's current state"""
81
+ try:
82
+ # Check if this is an action step with code output
83
+ if hasattr(step, 'action') and step.action is not None:
84
+ if hasattr(step.action, 'output'):
85
+ # Create screenshots directory if it doesn't exist
86
+ os.makedirs('screenshots', exist_ok=True)
87
+
88
+ # Generate filename with timestamp
89
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
90
+ filename = f"screenshots/agent_step_{timestamp}.txt"
91
+
92
+ # Save the step information
93
+ with open(filename, 'w', encoding='utf-8') as f:
94
+ f.write(f"Step Type: {type(step).__name__}\n")
95
+ f.write(f"Timestamp: {datetime.now().isoformat()}\n")
96
+ f.write("=" * 50 + "\n")
97
+
98
+ # Write action details
99
+ if hasattr(step.action, 'tool_name'):
100
+ f.write(f"Tool: {step.action.tool_name}\n")
101
+ if hasattr(step.action, 'arguments'):
102
+ f.write(f"Arguments: {step.action.arguments}\n")
103
+
104
+ f.write("\nOutput:\n")
105
+ f.write(str(step.action.output))
106
+
107
+ print(f"Saved screenshot: {filename}")
108
+
109
+ except Exception as e:
110
+ print(f"Error saving screenshot: {e}")
111
+
112
 
113
  def initialize_driver():
114
  """Initialize the Selenium WebDriver."""
 
165
  And at the end, only when you have your answer, return your final answer.
166
  Code:
167
  ```py
168
+ Make the final answer format is only a string
169
  final_answer("YOUR_ANSWER_HERE")
170
  ```<end_code>
171
  If pages seem stuck on loading, you might have to wait, for instance `import time` and run `time.sleep(5.0)`. But don't overuse this!
 
177
  When you have modals or cookie banners on screen, you should get rid of them before you can click anything else.
178
  """
179
 
180
+ def initialize_agent(model):
181
+ """Initialize the CodeAgent with the specified model."""
182
+ return CodeAgent(
183
+ tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],
184
+ model=model,
185
+ additional_authorized_imports=["helium"],
186
+ step_callbacks=[save_screenshot],
187
+ max_steps=20,
188
+ verbosity_level=2,
189
+ )
190
 
191
  def main():
 
 
 
192
 
193
  # Parse command line arguments
194
  args = parse_arguments()
 
198
 
199
  global driver
200
  driver = initialize_driver()
201
+ agent = initialize_agent(model)
 
202
 
203
  # Run the agent with the provided prompt
204
+ agent.python_executor("from helium import *")
205
+ agent.run(args.prompt + helium_instructions)
206
 
207
 
208
  if __name__ == "__main__":
209
+ main()
find_batman_mobile_agent.py CHANGED
@@ -1,4 +1,7 @@
 
1
  import math
 
 
2
  from typing import Optional, Tuple
3
 
4
  from smolagents import tool
@@ -28,8 +31,8 @@ def calculate_cargo_travel_time(
28
  return degrees * (math.pi / 180.0)
29
 
30
  #extract coordinates
31
- lat1, lon1 = map(origin_coords, to_radians)
32
- lat2, lon2 = map(destination_coords, to_radians)
33
 
34
  # Earth Radius in kilometers
35
  EARTH_RADIUS_KM = 6371.0
@@ -55,7 +58,9 @@ def calculate_cargo_travel_time(
55
  # Format the results
56
  return round(flight_time, 2)
57
 
58
- print(calculate_cargo_travel_time(41.8781, -87.6298), (-33.8688, 151.2093))
 
 
59
 
60
 
61
  def check_reasoning_and_plot(final_answer, agent_memory):
 
1
+ import os
2
  import math
3
+ from PIL import Image
4
+ from io import BytesIO
5
  from typing import Optional, Tuple
6
 
7
  from smolagents import tool
 
31
  return degrees * (math.pi / 180.0)
32
 
33
  #extract coordinates
34
+ lat1, lon1 = map(to_radians, origin_coords)
35
+ lat2, lon2 = map(to_radians, destination_coords)
36
 
37
  # Earth Radius in kilometers
38
  EARTH_RADIUS_KM = 6371.0
 
58
  # Format the results
59
  return round(flight_time, 2)
60
 
61
+ print(
62
+ calculate_cargo_travel_time(origin_coords=(41.8781, -87.6298), destination_coords=(-33.8688, 151.2093))
63
+ )
64
 
65
 
66
  def check_reasoning_and_plot(final_answer, agent_memory):
image_agent.py CHANGED
@@ -1,37 +1,41 @@
1
- from PTL import find_batman_mobile_agent
 
2
  import requests
3
  from io import BytesIO
 
 
 
4
 
5
  image_urls = [
6
- "https://upload.wikimedia.org/wikipedia/commons/e/e8/The_Joker_at_Wax_Museum_Plus.jpg", # Joker image
7
- "https://upload.wikimedia.org/wikipedia/en/9/98/Joker_%28DC_Comics_character%29.jpg" # Joker image
8
  ]
9
 
10
- images = []
11
 
12
  for url in image_urls:
13
  headers = {
14
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
15
  }
16
  response = requests.get(url, headers)
17
- image = Image.open(BytesIO(response.content)).convert("RGB")
18
- images.append(image)
 
 
 
 
 
 
 
19
 
20
 
21
- # Now based on the images indicate if it's wonder women or joker.
22
- model = OpenAIServerModel(model="gpt-4o")
23
- agent = CodeAgent(
24
- tools=[],
25
- model=model,
26
- max_steps = 20,
27
- verbosity_level=2
28
- )
29
-
30
- response = agent.run(
31
- """
32
- Describe the costume and makeup that the comic characteer in these photos is wearing and return the description.
33
- Tell me if the guest is the Joker or Wonder Women
34
- """,
35
- images = images
36
- )
37
-
 
1
+ import os
2
+ import PIL
3
  import requests
4
  from io import BytesIO
5
+ from PIL import Image
6
+ from smolagents import CodeAgent, OpenAIServerModel, InferenceClientModel
7
+ from huggingface_hub import login
8
 
9
  image_urls = [
10
+ "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTcB9-ieIzm2uhtTkMkV5F50xbL4itvh3j8bg&s", # Joker image
11
+ "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR0SWQC74KuQWrJA33pJqUmOuDx62K7p1uEcQ&s" # Joker image
12
  ]
13
 
14
+ stored_images = []
15
 
16
  for url in image_urls:
17
  headers = {
18
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
19
  }
20
  response = requests.get(url, headers)
21
+ try:
22
+ image = Image.open(BytesIO(response.content)).convert("RGB")
23
+ stored_images.append(image)
24
+ except PIL.UnidentifiedImageError:
25
+ print("⚠️ The downloaded file is not a valid image.")
26
+ # Optional: save the raw bytes for inspection
27
+ with open("debug_image.bin", "wb") as f:
28
+ f.write(response.content)
29
+ # image = Image.open(BytesIO(response.content)).convert("RGB")
30
 
31
 
32
+ def init_stored_image_agent() -> CodeAgent:
33
+ # Now based on the images indicate if it's wonder women or joker.
34
+ model = OpenAIServerModel(model_id="gpt-4o")
35
+ stored_image_agent = CodeAgent(
36
+ tools=[],
37
+ model=model,
38
+ max_steps=5,
39
+ verbosity_level=2
40
+ )
41
+ return stored_image_agent
 
 
 
 
 
 
 
mulitagents.py CHANGED
@@ -4,80 +4,66 @@ from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, Vi
4
 
5
  from find_batman_mobile_agent import calculate_cargo_travel_time, check_reasoning_and_plot
6
 
7
- example_model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together")
8
 
9
- task = """Find all Batman filming locations in the world, calculate the time to transfer via cargo plane to here (we're in Gotham, 40.7128, 74.0060), and return them to me as a pandas dataframe.
10
- Also give me some supercar factories with the same cargo plane transfer time.
11
- """
12
 
13
- example_multi_agent = CodeAgent(
14
- model=example_model,
15
- tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), calculate_cargo_travel_time()],
16
- additional_authorized_imports=["pandas"],
17
- max_steps=20
18
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- result = example_multi_agent.run(task)
21
-
22
- example_multi_agent.planning_interval = 4
23
-
24
- detailed_report = example_multi_agent.run(f"""
25
- You're an expert analyst. You make comprehensive reports after visiting many websites.
26
- Don't hesitate to search for many queries at once in a for loop.
27
- For each data point that you find, visit the source url to confirm numbers.
28
- """)
29
-
30
- print(detailed_report)
31
-
32
- example_web_model = InferenceClientModel(
33
- "Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=8096
34
- )
35
-
36
- example_web_agent = CodeAgent(
37
- model=example_web_model,
38
- tools=[
39
- DuckDuckGoSearchTool(),
40
- VisitWebpageTool(),
41
- calculate_cargo_travel_time(),
42
- ],
43
- name = "web_agent",
44
- description="Browses the web to find information",
45
- verbosity_level=0,
46
- max_steps=10
47
- )
48
-
49
- manager_agent = CodeAgent(
50
- model=InferenceClientModel("deepseek-ai/DeepSeek-R1", provider="together", max_tokens=8096),
51
- tools=[calculate_cargo_travel_time],
52
- managed_agents=[example_web_agent],
53
- additional_authorized_imports=[
54
- "geopandas",
55
- "plotly",
56
- "shapely",
57
- "json",
58
- "pandas",
59
- "numpy"
60
- ],
61
- planning_interval=5,
62
- verbosity_level=2,
63
- final_answer_checks=[check_reasoning_and_plot],
64
- max_steps=15
65
- )
66
-
67
- manager_agent.visualize()
68
-
69
- manager_agent.run("""
70
- Find all Batman filming locations in the world, calculate the time to transfer via cargo plane to here (we're in Gotham, 40.7128° N, 74.0060° W).
71
- Also give me some supercar factories with the same cargo plane transfer time. You need at least 6 points in total.
72
- Represent this as spatial map of the world, with the locations represented as scatter points with a color that depends on the travel time, and save it to saved_map.png!
73
-
74
- Here's an example of how to plot and return a map:
75
- import plotly.express as px
76
- df = px.data.carshare()
77
- fig = px.scatter_map(df, lat="centroid_lat", lon="centroid_lon", text="name", color="peak_hour", size=100
78
- color_continous_scale=px.colors.sequential.Magma, size_max=15, zoom=1)
79
-
80
- Never try to process strings using code: when you have a string to read, just print it and you'll see it.
81
- """)
82
-
83
- manager_agent.python_executor.state["fig"]
 
4
 
5
  from find_batman_mobile_agent import calculate_cargo_travel_time, check_reasoning_and_plot
6
 
 
7
 
 
 
 
8
 
9
+ def define_multi_agent():
10
+ example_model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together")
11
+
12
+ task = """Find all Batman filming locations in the world, calculate the time to transfer via cargo plane to here (we're in Gotham, 40.7128, 74.0060), and return them to me as a pandas dataframe.
13
+ Also give me some supercar factories with the same cargo plane transfer time.
14
+ """
15
+
16
+ example_multi_agent = CodeAgent(
17
+ model=example_model,
18
+ tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), calculate_cargo_travel_time],
19
+ additional_authorized_imports=["pandas"],
20
+ max_steps=20
21
+ )
22
+
23
+ result = example_multi_agent.run(task)
24
+
25
+ example_multi_agent.planning_interval = 4
26
+
27
+ detailed_report = example_multi_agent.run(f"""
28
+ You're an expert analyst. You make comprehensive reports after visiting many websites.
29
+ Don't hesitate to search for many queries at once in a for loop.
30
+ For each data point that you find, visit the source url to confirm numbers.
31
+ """)
32
+
33
+ print(detailed_report)
34
+
35
+ example_web_model = InferenceClientModel(
36
+ "Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=8096
37
+ )
38
+
39
+ example_web_agent = CodeAgent(
40
+ model=example_web_model,
41
+ tools=[
42
+ DuckDuckGoSearchTool(),
43
+ VisitWebpageTool(),
44
+ calculate_cargo_travel_time,
45
+ ],
46
+ name="web_agent",
47
+ description="Browses the web to find information",
48
+ verbosity_level=0,
49
+ max_steps=10
50
+ )
51
+ manager_agent = CodeAgent(
52
+ model=InferenceClientModel("deepseek-ai/DeepSeek-R1", provider="together", max_tokens=8096),
53
+ tools=[calculate_cargo_travel_time],
54
+ managed_agents=[example_web_agent],
55
+ additional_authorized_imports=[
56
+ "geopandas",
57
+ "plotly",
58
+ "shapely",
59
+ "json",
60
+ "pandas",
61
+ "numpy"
62
+ ],
63
+ planning_interval=5,
64
+ verbosity_level=2,
65
+ final_answer_checks=[check_reasoning_and_plot],
66
+ max_steps=15
67
+ )
68
+ return manager_agent
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
nsfw_detection.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import requests
3
+ from io import BytesIO
4
+ import base64
5
+ import re
6
+ from transformers import pipeline
7
+
8
+ classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection")
9
+
10
+
11
+ def classify_image_if_nsfw(image_url: str):
12
+ try:
13
+ # Check if it's a base64 data URL
14
+ if image_url.startswith('data:image'):
15
+ print("Processing base64 data URL")
16
+
17
+ # Extract the base64 data from the data URL
18
+ match = re.match(r'data:image/(?P<ext>\w+);base64,(?P<data>.*)', image_url)
19
+ if not match:
20
+ raise ValueError("Invalid base64 data URL format")
21
+
22
+ base64_data = match.group('data')
23
+ image_format = match.group('ext')
24
+
25
+ # Decode the base64 data
26
+ image_data = base64.b64decode(base64_data)
27
+
28
+ # Open the image from decoded data
29
+ img = Image.open(BytesIO(image_data))
30
+
31
+ else:
32
+ # It's a regular URL - download the image
33
+ print("Processing regular URL")
34
+ response = requests.get(image_url)
35
+ response.raise_for_status()
36
+
37
+ # Open and process the image
38
+ img = Image.open(BytesIO(response.content))
39
+
40
+ print("Image size:", img.size)
41
+ print("Image format:", img.format)
42
+ print("Image mode:", img.mode)
43
+
44
+ # Ensure image is in RGB mode (required by most models)
45
+ if img.mode != 'RGB':
46
+ img = img.convert('RGB')
47
+
48
+ # Classify the image
49
+ classifier_response = classifier(img)
50
+ print("Classifier Response:", classifier_response)
51
+
52
+ return classifier_response
53
+
54
+ except Exception as e:
55
+ print(f"Error processing image: {e}")
56
+ raise
57
+
58
+ # Example usage with both types:
59
+ # Regular URL
60
+ # result1 = classify_image_if_nsfw("https://example.com/image.jpg")
61
+
62
+ # Base64 data URL (you would use an actual base64 string here)
63
+ # result2 = classify_image_if_nsfw("data:image/jpeg;base64,/9j/4AAQSkZJRgABAQ...")
requirements.txt CHANGED
@@ -1,8 +1,15 @@
1
- gradio
2
- huggingface_hub
3
- smolagents[all]
4
- requests
5
- pillow
6
- selenium
7
- helium
8
- langchain-community
 
 
 
 
 
 
 
 
1
+ gradio~=5.49.1
2
+ huggingface_hub~=1.0.1
3
+ smolagents[all]~=1.22.0
4
+ requests~=2.32.4
5
+ pillow~=11.3.0
6
+ selenium~=4.38.0
7
+ helium~=5.1.1
8
+ langchain-community~=0.3.27
9
+ litellm
10
+ transformers~=4.53.2
11
+ python-dotenv~=1.1.1
12
+ plotly
13
+ shapely
14
+ geopandas
15
+ markdownify