BrightData commited on
Commit
b78b32a
·
verified ·
1 Parent(s): bf496eb

Add Bright Data Search Tool

Browse files
Files changed (3) hide show
  1. app.py +5 -0
  2. requirements.txt +2 -0
  3. tool.py +83 -0
app.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from smolagents import launch_gradio_demo
2
+ from tool import BrightDataSearchTool
3
+
4
+ tool = BrightDataSearchTool()
5
+ launch_gradio_demo(tool)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ requests
2
+ smolagents
tool.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+ import json
4
+ import requests
5
+ import os
6
+
7
+ class BrightDataSearchTool(Tool):
8
+ name = "brightdata_search_engine"
9
+ description = """
10
+ Search Google, Bing, or Yandex and get structured results.
11
+ Returns search results with URLs, titles, and descriptions.
12
+ Ideal for gathering current information and news.
13
+ """
14
+ inputs = {'query': {'type': 'string', 'description': 'The search query'}, 'engine': {'type': 'string', 'description': "Search engine to use: 'google', 'bing', or 'yandex'. Default is 'google'", 'nullable': True, 'default': 'google'}}
15
+ output_type = "string"
16
+
17
+ def forward(self, query: str, engine: str = "google") -> str:
18
+ """
19
+ Search using Bright Data's search API.
20
+
21
+ Args:
22
+ query: The search query.
23
+ engine: Search engine to use (google, bing, or yandex).
24
+
25
+ Returns:
26
+ JSON string with search results or markdown for non-Google engines.
27
+ """
28
+ import os
29
+ import json
30
+ import requests
31
+
32
+ api_token = os.getenv("BRIGHT_DATA_API_TOKEN")
33
+ unlocker_zone = os.getenv("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker1")
34
+
35
+ if not api_token:
36
+ raise ValueError("BRIGHT_DATA_API_TOKEN not found in environment variables")
37
+
38
+ search_urls = {
39
+ "google": f"https://www.google.com/search?q={requests.utils.quote(query)}&brd_json=1",
40
+ "bing": f"https://www.bing.com/search?q={requests.utils.quote(query)}",
41
+ "yandex": f"https://yandex.com/search/?text={requests.utils.quote(query)}",
42
+ }
43
+
44
+ search_url = search_urls.get(engine.lower(), search_urls["google"])
45
+ is_google = engine.lower() == "google"
46
+
47
+ api_url = "https://api.brightdata.com/request"
48
+ headers = {
49
+ "Authorization": f"Bearer {api_token}",
50
+ "Content-Type": "application/json",
51
+ }
52
+
53
+ payload = {
54
+ "url": search_url,
55
+ "zone": unlocker_zone,
56
+ "format": "raw",
57
+ }
58
+
59
+ if not is_google:
60
+ payload["data_format"] = "markdown"
61
+
62
+ try:
63
+ response = requests.post(api_url, json=payload, headers=headers)
64
+ response.raise_for_status()
65
+
66
+ if is_google:
67
+ data = response.json()
68
+ results = {
69
+ "organic": data.get("organic", []),
70
+ "images": [img.get("link") for img in data.get("images", [])],
71
+ "related": data.get("related", []),
72
+ "ai_overview": data.get("ai_overview"),
73
+ }
74
+ return json.dumps(results, indent=2)
75
+
76
+ # Return markdown for Bing/Yandex
77
+ return response.text
78
+
79
+ except requests.exceptions.RequestException as e:
80
+ return json.dumps({"error": str(e)})
81
+
82
+ def __init__(self, *args, **kwargs):
83
+ self.is_initialized = False