BrightData's picture
Add Bright Data Search Tool
d624fb3 verified
raw
history blame
4.11 kB
from typing import Any, Optional
from smolagents.tools import Tool
import os
import json
import requests
class BrightDataSearchTool(Tool):
name = "brightdata_search_engine"
description = """
Search Google, Bing, or Yandex and get structured results.
Returns search results with URLs, titles, and descriptions.
Ideal for gathering current information and news.
"""
output_type = "string"
def __init__(self):
# Build inputs at init to satisfy smolagents validation rules.
self.inputs = {
"query": {
"type": "string",
"description": "The search query",
},
"engine": {
"type": "string",
"description": "Search engine to use",
"enum": ["google", "bing", "yandex"],
"nullable": True,
"default": "google",
},
}
super().__init__()
def forward(self, query: str, engine: str = "google") -> str:
"""
Search using Bright Data's search API.
Args:
query: The search query.
engine: Search engine to use (google, bing, or yandex).
Returns:
JSON string with search results or markdown for non-Google engines.
"""
import os
import json
import requests
api_token = os.getenv("BRIGHT_DATA_API_TOKEN")
unlocker_zone = os.getenv("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker1")
if not api_token:
raise ValueError("BRIGHT_DATA_API_TOKEN not found in environment variables")
search_urls = {
"google": f"https://www.google.com/search?q={requests.utils.quote(query)}&brd_json=1",
"bing": f"https://www.bing.com/search?q={requests.utils.quote(query)}",
"yandex": f"https://yandex.com/search/?text={requests.utils.quote(query)}",
}
search_url = search_urls.get(engine.lower(), search_urls["google"])
is_google = engine.lower() == "google"
api_url = "https://api.brightdata.com/request"
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json",
}
payload = {
"url": search_url,
"zone": unlocker_zone,
"format": "raw",
}
if not is_google:
payload["data_format"] = "markdown"
try:
response = requests.post(api_url, json=payload, headers=headers)
response.raise_for_status()
if is_google:
data = response.json()
results = {
"organic": data.get("organic", []),
"images": [img.get("link") for img in data.get("images", [])],
"related": data.get("related", []),
"ai_overview": data.get("ai_overview"),
}
return json.dumps(results, indent=2)
# Return markdown for Bing/Yandex
return response.text
except requests.exceptions.RequestException as e:
return json.dumps({"error": str(e)})
def _get_gradio_app_code(self, tool_module_name: str = "tool") -> str:
"""
Custom Gradio UI: dropdown for engine, textbox for query.
Uses importlib to avoid adding a fake 'tool' requirement.
"""
choices = ["google", "bing", "yandex"]
return f"""import gradio as gr
import importlib
BrightDataSearchTool = importlib.import_module("tool").BrightDataSearchTool
tool = BrightDataSearchTool()
def run(query, engine):
return tool(query=query, engine=engine)
with gr.Blocks() as demo:
gr.Markdown("### Bright Data search")
query = gr.Textbox(label="Query")
engine = gr.Dropdown(choices={choices}, label="Engine", value="google")
run_btn = gr.Button("Run")
output = gr.Textbox(label="Output", lines=12)
run_btn.click(run, inputs=[query, engine], outputs=output)
demo.launch()
"""