Spaces:
Build error
Build error
AliA1997
commited on
Commit
·
8bed67e
1
Parent(s):
6619686
Completed some demos from huggingface tutorials.
Browse files- app.py +129 -58
- debug_image.bin +3 -0
- dynamic_image_agent.py +56 -43
- find_batman_mobile_agent.py +8 -3
- image_agent.py +27 -23
- mulitagents.py +60 -74
- nsfw_detection.py +63 -0
- requirements.txt +15 -8
app.py
CHANGED
|
@@ -1,77 +1,148 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
|
|
|
|
|
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
def respond(
|
| 6 |
message,
|
| 7 |
history: list[dict[str, str]],
|
| 8 |
system_message,
|
| 9 |
max_tokens,
|
| 10 |
temperature,
|
| 11 |
-
top_p
|
| 12 |
-
hf_token: gr.OAuthToken,
|
| 13 |
):
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
""
|
| 44 |
-
|
| 45 |
-
""
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
chatbot = gr.ChatInterface(
|
| 52 |
-
respond,
|
| 53 |
type="messages",
|
| 54 |
-
additional_inputs=[
|
| 55 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
| 56 |
-
],
|
| 57 |
)
|
| 58 |
|
|
|
|
| 59 |
with gr.Blocks() as demo:
|
| 60 |
-
with gr.Sidebar():
|
| 61 |
-
gr.LoginButton()
|
| 62 |
|
| 63 |
chatbot.render()
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
if __name__ == "__main__":
|
| 77 |
demo.launch()
|
|
|
|
| 1 |
+
import json
|
| 2 |
import gradio as gr
|
| 3 |
+
import os
|
| 4 |
+
from huggingface_hub import InferenceClient, login
|
| 5 |
+
from image_agent import init_stored_image_agent, stored_images
|
| 6 |
+
from dynamic_image_agent import main
|
| 7 |
+
from mulitagents import define_multi_agent
|
| 8 |
+
from nsfw_detection import classify_image_if_nsfw
|
| 9 |
|
| 10 |
+
login(os.environ.get('HF_TOKEN'))
|
| 11 |
+
os.environ["OPENAI_API_KEY"] = os.environ.get('OPENAI_API_KEY')
|
| 12 |
|
| 13 |
+
# --- Global state ---
|
| 14 |
+
active_agent = None # will store which agent is currently selected
|
| 15 |
+
|
| 16 |
+
def init_and_extend_messages(system_msg: object, history: list[dict[str, str]]):
|
| 17 |
+
messages = [system_msg]
|
| 18 |
+
messages.extend(history)
|
| 19 |
+
return messages
|
| 20 |
+
|
| 21 |
+
# --- Respond function ---
|
| 22 |
def respond(
|
| 23 |
message,
|
| 24 |
history: list[dict[str, str]],
|
| 25 |
system_message,
|
| 26 |
max_tokens,
|
| 27 |
temperature,
|
| 28 |
+
top_p
|
|
|
|
| 29 |
):
|
| 30 |
+
"""Routes the user message to the active agent."""
|
| 31 |
+
global active_agent
|
| 32 |
+
|
| 33 |
+
if not active_agent:
|
| 34 |
+
return "⚠️ Please select an agent before chatting."
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
# Route message to correct agent
|
| 40 |
+
if active_agent == "stored":
|
| 41 |
+
response = init_stored_image_agent().run(
|
| 42 |
+
f"""
|
| 43 |
+
{message}
|
| 44 |
+
""",
|
| 45 |
+
images=stored_images
|
| 46 |
+
)
|
| 47 |
+
elif active_agent == "dynamic":
|
| 48 |
+
json_response = main()
|
| 49 |
+
print("JSON RESPONSE:", json_response)
|
| 50 |
+
if isinstance(json_response, str):
|
| 51 |
+
return json_response
|
| 52 |
+
else:
|
| 53 |
+
try:
|
| 54 |
+
response = json.dumps(json_response, indent=4)
|
| 55 |
+
return response
|
| 56 |
+
except (json.JSONDecodeError, TypeError):
|
| 57 |
+
# JSONDecodeError for invalid JSON format in a string
|
| 58 |
+
# TypeError if the input is not a string or bytes-like object
|
| 59 |
+
print("Error parsing json response:", json_response)
|
| 60 |
+
return "Error generating response"
|
| 61 |
+
elif active_agent == "multi":
|
| 62 |
+
manager_agent = define_multi_agent()
|
| 63 |
+
manager_agent.visualize()
|
| 64 |
+
|
| 65 |
+
json_response = manager_agent.run(f"""{message}""")
|
| 66 |
+
|
| 67 |
+
manager_agent.python_executor.state["fig"]
|
| 68 |
+
if isinstance(json_response, str):
|
| 69 |
+
response = json_response
|
| 70 |
+
return response
|
| 71 |
+
else:
|
| 72 |
+
try:
|
| 73 |
+
response = json.dumps(json_response, indent=4)
|
| 74 |
+
return response
|
| 75 |
+
except (json.JSONDecodeError, TypeError):
|
| 76 |
+
# JSONDecodeError for invalid JSON format in a string
|
| 77 |
+
# TypeError if the input is not a string or bytes-like object
|
| 78 |
+
print("Error parsing json response:", json_response)
|
| 79 |
+
return "Error generating response"
|
| 80 |
+
elif active_agent == "nsfw check":
|
| 81 |
+
json_response = classify_image_if_nsfw(message)
|
| 82 |
+
response = json.dumps(json_response, indent=4)
|
| 83 |
+
return response
|
| 84 |
+
else:
|
| 85 |
+
response = f"Unknown agent selected."
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print("Exception:", str(e))
|
| 88 |
+
|
| 89 |
+
response = f"⚠️ Error: {e}"
|
| 90 |
+
|
| 91 |
+
return response
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# --- Button Handlers ---
|
| 95 |
+
def use_stored_image_agent():
|
| 96 |
+
global active_agent
|
| 97 |
+
active_agent = "stored"
|
| 98 |
+
return "✅ Switched to Stored Image Agent."
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def use_dynamic_image_agent():
|
| 102 |
+
global active_agent
|
| 103 |
+
active_agent = "dynamic"
|
| 104 |
+
return "✅ Switched to Dynamic Image Agent."
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def use_multi_agent():
|
| 108 |
+
global active_agent
|
| 109 |
+
active_agent = "multi"
|
| 110 |
+
return "✅ Switched to Multi-Agent mode."
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def use_nsfw_check():
|
| 114 |
+
global active_agent
|
| 115 |
+
active_agent = "nsfw check"
|
| 116 |
+
classify_image_if_nsfw("https://static.api4.ai/api4.ai/nsfw/demo-pic-1.jpg")
|
| 117 |
+
return "✅ Switched to NSFW Check mode."
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
# --- Chat Interface ---
|
| 121 |
chatbot = gr.ChatInterface(
|
| 122 |
+
fn=respond,
|
| 123 |
type="messages",
|
| 124 |
+
additional_inputs=[]
|
|
|
|
|
|
|
| 125 |
)
|
| 126 |
|
| 127 |
+
# --- Layout ---
|
| 128 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
| 129 |
|
| 130 |
chatbot.render()
|
| 131 |
+
gr.Markdown("### 🧩 Choose an Agent:")
|
| 132 |
+
with gr.Row():
|
| 133 |
+
stored_img_button = gr.Button("Checked Stored Superheros", variant="secondary")
|
| 134 |
+
dynamic_img_button = gr.Button("Dynamically look for superheros", variant="primary")
|
| 135 |
+
multi_agent_button = gr.Button("Search superhero's using multiple agents", variant="secondary")
|
| 136 |
+
check_nsfw_button = gr.Button("NSFW Check on Image", variant="stop")
|
| 137 |
+
|
| 138 |
+
# Display agent switch confirmation message
|
| 139 |
+
status_box = gr.Textbox(label="Agent Status", interactive=False)
|
| 140 |
+
|
| 141 |
+
stored_img_button.click(fn=use_stored_image_agent, inputs=None, outputs=status_box)
|
| 142 |
+
dynamic_img_button.click(fn=use_dynamic_image_agent, inputs=None, outputs=status_box)
|
| 143 |
+
multi_agent_button.click(fn=use_multi_agent, inputs=None, outputs=status_box)
|
| 144 |
+
check_nsfw_button.click(fn= use_nsfw_check, inputs=None, outputs=status_box)
|
| 145 |
+
|
| 146 |
+
# --- Run app ---
|
| 147 |
if __name__ == "__main__":
|
| 148 |
demo.launch()
|
debug_image.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9feb6223827c2f855913bfe56f43b8463ea61c448dede1765dd3cf92d1e1d549
|
| 3 |
+
size 92
|
dynamic_image_agent.py
CHANGED
|
@@ -1,18 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
from time import sleep
|
|
|
|
| 5 |
|
| 6 |
-
import helium
|
| 7 |
from dotenv import load_dotenv
|
|
|
|
| 8 |
from selenium import webdriver
|
| 9 |
from selenium.webdriver.common.by import By
|
| 10 |
from selenium.webdriver.common.keys import Keys
|
| 11 |
|
| 12 |
-
from smolagents import tool, CodeAgent,
|
| 13 |
from smolagents.agents import ActionStep
|
| 14 |
from smolagents.cli import load_model
|
| 15 |
-
|
| 16 |
|
| 17 |
agent_request = """
|
| 18 |
I am alfred, the butler of wayne manor, responsible for verifying the identity of guests at party. A superhero has arrived at the entrance claiming to be Wonderwomen, but I need to confirm if she is who she says she is.
|
|
@@ -74,35 +75,40 @@ def close_popups() -> str:
|
|
| 74 |
"""
|
| 75 |
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
if
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
def initialize_driver():
|
| 108 |
"""Initialize the Selenium WebDriver."""
|
|
@@ -159,6 +165,7 @@ Proceed in several steps rather than trying to solve the task in one shot.
|
|
| 159 |
And at the end, only when you have your answer, return your final answer.
|
| 160 |
Code:
|
| 161 |
```py
|
|
|
|
| 162 |
final_answer("YOUR_ANSWER_HERE")
|
| 163 |
```<end_code>
|
| 164 |
If pages seem stuck on loading, you might have to wait, for instance `import time` and run `time.sleep(5.0)`. But don't overuse this!
|
|
@@ -170,11 +177,18 @@ Don't kill the browser.
|
|
| 170 |
When you have modals or cookie banners on screen, you should get rid of them before you can click anything else.
|
| 171 |
"""
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
def main():
|
| 175 |
-
# Load environment variables
|
| 176 |
-
# For example to use an OpenAI model, create a local .env file with OPENAI_API_KEY="<your_open_ai_key_here>"
|
| 177 |
-
load_dotenv()
|
| 178 |
|
| 179 |
# Parse command line arguments
|
| 180 |
args = parse_arguments()
|
|
@@ -184,13 +198,12 @@ def main():
|
|
| 184 |
|
| 185 |
global driver
|
| 186 |
driver = initialize_driver()
|
| 187 |
-
|
| 188 |
-
dynamic_image_agent.run(agent_request)
|
| 189 |
|
| 190 |
# Run the agent with the provided prompt
|
| 191 |
-
|
| 192 |
-
|
| 193 |
|
| 194 |
|
| 195 |
if __name__ == "__main__":
|
| 196 |
-
main()
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import helium
|
| 4 |
from time import sleep
|
| 5 |
+
from datetime import datetime
|
| 6 |
|
|
|
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
+
from PIL import Image
|
| 9 |
from selenium import webdriver
|
| 10 |
from selenium.webdriver.common.by import By
|
| 11 |
from selenium.webdriver.common.keys import Keys
|
| 12 |
|
| 13 |
+
from smolagents import tool, CodeAgent, DuckDuckGoSearchTool, Model, InferenceClientModel
|
| 14 |
from smolagents.agents import ActionStep
|
| 15 |
from smolagents.cli import load_model
|
| 16 |
+
from io import BytesIO
|
| 17 |
|
| 18 |
agent_request = """
|
| 19 |
I am alfred, the butler of wayne manor, responsible for verifying the identity of guests at party. A superhero has arrived at the entrance claiming to be Wonderwomen, but I need to confirm if she is who she says she is.
|
|
|
|
| 75 |
"""
|
| 76 |
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
|
| 77 |
|
| 78 |
+
|
| 79 |
+
def save_screenshot(step):
|
| 80 |
+
"""Save screenshot of the agent's current state"""
|
| 81 |
+
try:
|
| 82 |
+
# Check if this is an action step with code output
|
| 83 |
+
if hasattr(step, 'action') and step.action is not None:
|
| 84 |
+
if hasattr(step.action, 'output'):
|
| 85 |
+
# Create screenshots directory if it doesn't exist
|
| 86 |
+
os.makedirs('screenshots', exist_ok=True)
|
| 87 |
+
|
| 88 |
+
# Generate filename with timestamp
|
| 89 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
| 90 |
+
filename = f"screenshots/agent_step_{timestamp}.txt"
|
| 91 |
+
|
| 92 |
+
# Save the step information
|
| 93 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
| 94 |
+
f.write(f"Step Type: {type(step).__name__}\n")
|
| 95 |
+
f.write(f"Timestamp: {datetime.now().isoformat()}\n")
|
| 96 |
+
f.write("=" * 50 + "\n")
|
| 97 |
+
|
| 98 |
+
# Write action details
|
| 99 |
+
if hasattr(step.action, 'tool_name'):
|
| 100 |
+
f.write(f"Tool: {step.action.tool_name}\n")
|
| 101 |
+
if hasattr(step.action, 'arguments'):
|
| 102 |
+
f.write(f"Arguments: {step.action.arguments}\n")
|
| 103 |
+
|
| 104 |
+
f.write("\nOutput:\n")
|
| 105 |
+
f.write(str(step.action.output))
|
| 106 |
+
|
| 107 |
+
print(f"Saved screenshot: {filename}")
|
| 108 |
+
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"Error saving screenshot: {e}")
|
| 111 |
+
|
| 112 |
|
| 113 |
def initialize_driver():
|
| 114 |
"""Initialize the Selenium WebDriver."""
|
|
|
|
| 165 |
And at the end, only when you have your answer, return your final answer.
|
| 166 |
Code:
|
| 167 |
```py
|
| 168 |
+
Make the final answer format is only a string
|
| 169 |
final_answer("YOUR_ANSWER_HERE")
|
| 170 |
```<end_code>
|
| 171 |
If pages seem stuck on loading, you might have to wait, for instance `import time` and run `time.sleep(5.0)`. But don't overuse this!
|
|
|
|
| 177 |
When you have modals or cookie banners on screen, you should get rid of them before you can click anything else.
|
| 178 |
"""
|
| 179 |
|
| 180 |
+
def initialize_agent(model):
|
| 181 |
+
"""Initialize the CodeAgent with the specified model."""
|
| 182 |
+
return CodeAgent(
|
| 183 |
+
tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],
|
| 184 |
+
model=model,
|
| 185 |
+
additional_authorized_imports=["helium"],
|
| 186 |
+
step_callbacks=[save_screenshot],
|
| 187 |
+
max_steps=20,
|
| 188 |
+
verbosity_level=2,
|
| 189 |
+
)
|
| 190 |
|
| 191 |
def main():
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
# Parse command line arguments
|
| 194 |
args = parse_arguments()
|
|
|
|
| 198 |
|
| 199 |
global driver
|
| 200 |
driver = initialize_driver()
|
| 201 |
+
agent = initialize_agent(model)
|
|
|
|
| 202 |
|
| 203 |
# Run the agent with the provided prompt
|
| 204 |
+
agent.python_executor("from helium import *")
|
| 205 |
+
agent.run(args.prompt + helium_instructions)
|
| 206 |
|
| 207 |
|
| 208 |
if __name__ == "__main__":
|
| 209 |
+
main()
|
find_batman_mobile_agent.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
|
|
| 1 |
import math
|
|
|
|
|
|
|
| 2 |
from typing import Optional, Tuple
|
| 3 |
|
| 4 |
from smolagents import tool
|
|
@@ -28,8 +31,8 @@ def calculate_cargo_travel_time(
|
|
| 28 |
return degrees * (math.pi / 180.0)
|
| 29 |
|
| 30 |
#extract coordinates
|
| 31 |
-
lat1, lon1 = map(
|
| 32 |
-
lat2, lon2 = map(
|
| 33 |
|
| 34 |
# Earth Radius in kilometers
|
| 35 |
EARTH_RADIUS_KM = 6371.0
|
|
@@ -55,7 +58,9 @@ def calculate_cargo_travel_time(
|
|
| 55 |
# Format the results
|
| 56 |
return round(flight_time, 2)
|
| 57 |
|
| 58 |
-
print(
|
|
|
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
def check_reasoning_and_plot(final_answer, agent_memory):
|
|
|
|
| 1 |
+
import os
|
| 2 |
import math
|
| 3 |
+
from PIL import Image
|
| 4 |
+
from io import BytesIO
|
| 5 |
from typing import Optional, Tuple
|
| 6 |
|
| 7 |
from smolagents import tool
|
|
|
|
| 31 |
return degrees * (math.pi / 180.0)
|
| 32 |
|
| 33 |
#extract coordinates
|
| 34 |
+
lat1, lon1 = map(to_radians, origin_coords)
|
| 35 |
+
lat2, lon2 = map(to_radians, destination_coords)
|
| 36 |
|
| 37 |
# Earth Radius in kilometers
|
| 38 |
EARTH_RADIUS_KM = 6371.0
|
|
|
|
| 58 |
# Format the results
|
| 59 |
return round(flight_time, 2)
|
| 60 |
|
| 61 |
+
print(
|
| 62 |
+
calculate_cargo_travel_time(origin_coords=(41.8781, -87.6298), destination_coords=(-33.8688, 151.2093))
|
| 63 |
+
)
|
| 64 |
|
| 65 |
|
| 66 |
def check_reasoning_and_plot(final_answer, agent_memory):
|
image_agent.py
CHANGED
|
@@ -1,37 +1,41 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
import requests
|
| 3 |
from io import BytesIO
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
image_urls = [
|
| 6 |
-
"https://
|
| 7 |
-
"https://
|
| 8 |
]
|
| 9 |
|
| 10 |
-
|
| 11 |
|
| 12 |
for url in image_urls:
|
| 13 |
headers = {
|
| 14 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
|
| 15 |
}
|
| 16 |
response = requests.get(url, headers)
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
"""
|
| 32 |
-
Describe the costume and makeup that the comic characteer in these photos is wearing and return the description.
|
| 33 |
-
Tell me if the guest is the Joker or Wonder Women
|
| 34 |
-
""",
|
| 35 |
-
images = images
|
| 36 |
-
)
|
| 37 |
-
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import PIL
|
| 3 |
import requests
|
| 4 |
from io import BytesIO
|
| 5 |
+
from PIL import Image
|
| 6 |
+
from smolagents import CodeAgent, OpenAIServerModel, InferenceClientModel
|
| 7 |
+
from huggingface_hub import login
|
| 8 |
|
| 9 |
image_urls = [
|
| 10 |
+
"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTcB9-ieIzm2uhtTkMkV5F50xbL4itvh3j8bg&s", # Joker image
|
| 11 |
+
"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR0SWQC74KuQWrJA33pJqUmOuDx62K7p1uEcQ&s" # Joker image
|
| 12 |
]
|
| 13 |
|
| 14 |
+
stored_images = []
|
| 15 |
|
| 16 |
for url in image_urls:
|
| 17 |
headers = {
|
| 18 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
|
| 19 |
}
|
| 20 |
response = requests.get(url, headers)
|
| 21 |
+
try:
|
| 22 |
+
image = Image.open(BytesIO(response.content)).convert("RGB")
|
| 23 |
+
stored_images.append(image)
|
| 24 |
+
except PIL.UnidentifiedImageError:
|
| 25 |
+
print("⚠️ The downloaded file is not a valid image.")
|
| 26 |
+
# Optional: save the raw bytes for inspection
|
| 27 |
+
with open("debug_image.bin", "wb") as f:
|
| 28 |
+
f.write(response.content)
|
| 29 |
+
# image = Image.open(BytesIO(response.content)).convert("RGB")
|
| 30 |
|
| 31 |
|
| 32 |
+
def init_stored_image_agent() -> CodeAgent:
|
| 33 |
+
# Now based on the images indicate if it's wonder women or joker.
|
| 34 |
+
model = OpenAIServerModel(model_id="gpt-4o")
|
| 35 |
+
stored_image_agent = CodeAgent(
|
| 36 |
+
tools=[],
|
| 37 |
+
model=model,
|
| 38 |
+
max_steps=5,
|
| 39 |
+
verbosity_level=2
|
| 40 |
+
)
|
| 41 |
+
return stored_image_agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mulitagents.py
CHANGED
|
@@ -4,80 +4,66 @@ from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, Vi
|
|
| 4 |
|
| 5 |
from find_batman_mobile_agent import calculate_cargo_travel_time, check_reasoning_and_plot
|
| 6 |
|
| 7 |
-
example_model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together")
|
| 8 |
|
| 9 |
-
task = """Find all Batman filming locations in the world, calculate the time to transfer via cargo plane to here (we're in Gotham, 40.7128, 74.0060), and return them to me as a pandas dataframe.
|
| 10 |
-
Also give me some supercar factories with the same cargo plane transfer time.
|
| 11 |
-
"""
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
result = example_multi_agent.run(task)
|
| 21 |
-
|
| 22 |
-
example_multi_agent.planning_interval = 4
|
| 23 |
-
|
| 24 |
-
detailed_report = example_multi_agent.run(f"""
|
| 25 |
-
You're an expert analyst. You make comprehensive reports after visiting many websites.
|
| 26 |
-
Don't hesitate to search for many queries at once in a for loop.
|
| 27 |
-
For each data point that you find, visit the source url to confirm numbers.
|
| 28 |
-
""")
|
| 29 |
-
|
| 30 |
-
print(detailed_report)
|
| 31 |
-
|
| 32 |
-
example_web_model = InferenceClientModel(
|
| 33 |
-
"Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=8096
|
| 34 |
-
)
|
| 35 |
-
|
| 36 |
-
example_web_agent = CodeAgent(
|
| 37 |
-
model=example_web_model,
|
| 38 |
-
tools=[
|
| 39 |
-
DuckDuckGoSearchTool(),
|
| 40 |
-
VisitWebpageTool(),
|
| 41 |
-
calculate_cargo_travel_time(),
|
| 42 |
-
],
|
| 43 |
-
name = "web_agent",
|
| 44 |
-
description="Browses the web to find information",
|
| 45 |
-
verbosity_level=0,
|
| 46 |
-
max_steps=10
|
| 47 |
-
)
|
| 48 |
-
|
| 49 |
-
manager_agent = CodeAgent(
|
| 50 |
-
model=InferenceClientModel("deepseek-ai/DeepSeek-R1", provider="together", max_tokens=8096),
|
| 51 |
-
tools=[calculate_cargo_travel_time],
|
| 52 |
-
managed_agents=[example_web_agent],
|
| 53 |
-
additional_authorized_imports=[
|
| 54 |
-
"geopandas",
|
| 55 |
-
"plotly",
|
| 56 |
-
"shapely",
|
| 57 |
-
"json",
|
| 58 |
-
"pandas",
|
| 59 |
-
"numpy"
|
| 60 |
-
],
|
| 61 |
-
planning_interval=5,
|
| 62 |
-
verbosity_level=2,
|
| 63 |
-
final_answer_checks=[check_reasoning_and_plot],
|
| 64 |
-
max_steps=15
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
manager_agent.visualize()
|
| 68 |
-
|
| 69 |
-
manager_agent.run("""
|
| 70 |
-
Find all Batman filming locations in the world, calculate the time to transfer via cargo plane to here (we're in Gotham, 40.7128° N, 74.0060° W).
|
| 71 |
-
Also give me some supercar factories with the same cargo plane transfer time. You need at least 6 points in total.
|
| 72 |
-
Represent this as spatial map of the world, with the locations represented as scatter points with a color that depends on the travel time, and save it to saved_map.png!
|
| 73 |
-
|
| 74 |
-
Here's an example of how to plot and return a map:
|
| 75 |
-
import plotly.express as px
|
| 76 |
-
df = px.data.carshare()
|
| 77 |
-
fig = px.scatter_map(df, lat="centroid_lat", lon="centroid_lon", text="name", color="peak_hour", size=100
|
| 78 |
-
color_continous_scale=px.colors.sequential.Magma, size_max=15, zoom=1)
|
| 79 |
-
|
| 80 |
-
Never try to process strings using code: when you have a string to read, just print it and you'll see it.
|
| 81 |
-
""")
|
| 82 |
-
|
| 83 |
-
manager_agent.python_executor.state["fig"]
|
|
|
|
| 4 |
|
| 5 |
from find_batman_mobile_agent import calculate_cargo_travel_time, check_reasoning_and_plot
|
| 6 |
|
|
|
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
def define_multi_agent():
|
| 10 |
+
example_model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together")
|
| 11 |
+
|
| 12 |
+
task = """Find all Batman filming locations in the world, calculate the time to transfer via cargo plane to here (we're in Gotham, 40.7128, 74.0060), and return them to me as a pandas dataframe.
|
| 13 |
+
Also give me some supercar factories with the same cargo plane transfer time.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
example_multi_agent = CodeAgent(
|
| 17 |
+
model=example_model,
|
| 18 |
+
tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), calculate_cargo_travel_time],
|
| 19 |
+
additional_authorized_imports=["pandas"],
|
| 20 |
+
max_steps=20
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
result = example_multi_agent.run(task)
|
| 24 |
+
|
| 25 |
+
example_multi_agent.planning_interval = 4
|
| 26 |
+
|
| 27 |
+
detailed_report = example_multi_agent.run(f"""
|
| 28 |
+
You're an expert analyst. You make comprehensive reports after visiting many websites.
|
| 29 |
+
Don't hesitate to search for many queries at once in a for loop.
|
| 30 |
+
For each data point that you find, visit the source url to confirm numbers.
|
| 31 |
+
""")
|
| 32 |
+
|
| 33 |
+
print(detailed_report)
|
| 34 |
+
|
| 35 |
+
example_web_model = InferenceClientModel(
|
| 36 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=8096
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
example_web_agent = CodeAgent(
|
| 40 |
+
model=example_web_model,
|
| 41 |
+
tools=[
|
| 42 |
+
DuckDuckGoSearchTool(),
|
| 43 |
+
VisitWebpageTool(),
|
| 44 |
+
calculate_cargo_travel_time,
|
| 45 |
+
],
|
| 46 |
+
name="web_agent",
|
| 47 |
+
description="Browses the web to find information",
|
| 48 |
+
verbosity_level=0,
|
| 49 |
+
max_steps=10
|
| 50 |
+
)
|
| 51 |
+
manager_agent = CodeAgent(
|
| 52 |
+
model=InferenceClientModel("deepseek-ai/DeepSeek-R1", provider="together", max_tokens=8096),
|
| 53 |
+
tools=[calculate_cargo_travel_time],
|
| 54 |
+
managed_agents=[example_web_agent],
|
| 55 |
+
additional_authorized_imports=[
|
| 56 |
+
"geopandas",
|
| 57 |
+
"plotly",
|
| 58 |
+
"shapely",
|
| 59 |
+
"json",
|
| 60 |
+
"pandas",
|
| 61 |
+
"numpy"
|
| 62 |
+
],
|
| 63 |
+
planning_interval=5,
|
| 64 |
+
verbosity_level=2,
|
| 65 |
+
final_answer_checks=[check_reasoning_and_plot],
|
| 66 |
+
max_steps=15
|
| 67 |
+
)
|
| 68 |
+
return manager_agent
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nsfw_detection.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image
|
| 2 |
+
import requests
|
| 3 |
+
from io import BytesIO
|
| 4 |
+
import base64
|
| 5 |
+
import re
|
| 6 |
+
from transformers import pipeline
|
| 7 |
+
|
| 8 |
+
classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def classify_image_if_nsfw(image_url: str):
|
| 12 |
+
try:
|
| 13 |
+
# Check if it's a base64 data URL
|
| 14 |
+
if image_url.startswith('data:image'):
|
| 15 |
+
print("Processing base64 data URL")
|
| 16 |
+
|
| 17 |
+
# Extract the base64 data from the data URL
|
| 18 |
+
match = re.match(r'data:image/(?P<ext>\w+);base64,(?P<data>.*)', image_url)
|
| 19 |
+
if not match:
|
| 20 |
+
raise ValueError("Invalid base64 data URL format")
|
| 21 |
+
|
| 22 |
+
base64_data = match.group('data')
|
| 23 |
+
image_format = match.group('ext')
|
| 24 |
+
|
| 25 |
+
# Decode the base64 data
|
| 26 |
+
image_data = base64.b64decode(base64_data)
|
| 27 |
+
|
| 28 |
+
# Open the image from decoded data
|
| 29 |
+
img = Image.open(BytesIO(image_data))
|
| 30 |
+
|
| 31 |
+
else:
|
| 32 |
+
# It's a regular URL - download the image
|
| 33 |
+
print("Processing regular URL")
|
| 34 |
+
response = requests.get(image_url)
|
| 35 |
+
response.raise_for_status()
|
| 36 |
+
|
| 37 |
+
# Open and process the image
|
| 38 |
+
img = Image.open(BytesIO(response.content))
|
| 39 |
+
|
| 40 |
+
print("Image size:", img.size)
|
| 41 |
+
print("Image format:", img.format)
|
| 42 |
+
print("Image mode:", img.mode)
|
| 43 |
+
|
| 44 |
+
# Ensure image is in RGB mode (required by most models)
|
| 45 |
+
if img.mode != 'RGB':
|
| 46 |
+
img = img.convert('RGB')
|
| 47 |
+
|
| 48 |
+
# Classify the image
|
| 49 |
+
classifier_response = classifier(img)
|
| 50 |
+
print("Classifier Response:", classifier_response)
|
| 51 |
+
|
| 52 |
+
return classifier_response
|
| 53 |
+
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"Error processing image: {e}")
|
| 56 |
+
raise
|
| 57 |
+
|
| 58 |
+
# Example usage with both types:
|
| 59 |
+
# Regular URL
|
| 60 |
+
# result1 = classify_image_if_nsfw("https://example.com/image.jpg")
|
| 61 |
+
|
| 62 |
+
# Base64 data URL (you would use an actual base64 string here)
|
| 63 |
+
# result2 = classify_image_if_nsfw("data:image/jpeg;base64,/9j/4AAQSkZJRgABAQ...")
|
requirements.txt
CHANGED
|
@@ -1,8 +1,15 @@
|
|
| 1 |
-
gradio
|
| 2 |
-
huggingface_hub
|
| 3 |
-
smolagents[all]
|
| 4 |
-
requests
|
| 5 |
-
pillow
|
| 6 |
-
selenium
|
| 7 |
-
helium
|
| 8 |
-
langchain-community
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio~=5.49.1
|
| 2 |
+
huggingface_hub~=1.0.1
|
| 3 |
+
smolagents[all]~=1.22.0
|
| 4 |
+
requests~=2.32.4
|
| 5 |
+
pillow~=11.3.0
|
| 6 |
+
selenium~=4.38.0
|
| 7 |
+
helium~=5.1.1
|
| 8 |
+
langchain-community~=0.3.27
|
| 9 |
+
litellm
|
| 10 |
+
transformers~=4.53.2
|
| 11 |
+
python-dotenv~=1.1.1
|
| 12 |
+
plotly
|
| 13 |
+
shapely
|
| 14 |
+
geopandas
|
| 15 |
+
markdownify
|