meirk-brd commited on
Commit
bcba5ba
·
1 Parent(s): 86503c7
Files changed (2) hide show
  1. app.py +41 -33
  2. tool.py +166 -168
app.py CHANGED
@@ -1,20 +1,23 @@
 
 
1
  import gradio as gr
2
- import importlib
3
 
4
- BrightDataDatasetTool = importlib.import_module("tool").BrightDataDatasetTool
 
5
  tool = BrightDataDatasetTool()
6
 
7
- DATASET_FIELDS = {'amazon_product': ['url'], 'amazon_product_reviews': ['url'], 'amazon_product_search': ['keyword', 'url'], 'walmart_product': ['url'], 'walmart_seller': ['url'], 'ebay_product': ['url'], 'homedepot_products': ['url'], 'zara_products': ['url'], 'etsy_products': ['url'], 'bestbuy_products': ['url'], 'linkedin_person_profile': ['url'], 'linkedin_company_profile': ['url'], 'linkedin_job_listings': ['url'], 'linkedin_posts': ['url'], 'linkedin_people_search': ['url', 'first_name', 'last_name'], 'crunchbase_company': ['url'], 'zoominfo_company_profile': ['url'], 'instagram_profiles': ['url'], 'instagram_posts': ['url'], 'instagram_reels': ['url'], 'instagram_comments': ['url'], 'facebook_posts': ['url'], 'facebook_marketplace_listings': ['url'], 'facebook_company_reviews': ['url', 'num_of_reviews'], 'facebook_events': ['url'], 'tiktok_profiles': ['url'], 'tiktok_posts': ['url'], 'tiktok_shop': ['url'], 'tiktok_comments': ['url'], 'google_maps_reviews': ['url', 'days_limit'], 'google_shopping': ['url'], 'google_play_store': ['url'], 'apple_app_store': ['url'], 'reuter_news': ['url'], 'github_repository_file': ['url'], 'yahoo_finance_business': ['url'], 'x_posts': ['url'], 'zillow_properties_listing': ['url'], 'booking_hotel_listings': ['url'], 'youtube_profiles': ['url'], 'youtube_comments': ['url', 'num_of_comments'], 'reddit_posts': ['url'], 'youtube_videos': ['url']}
8
- CHOICES = ['amazon_product', 'amazon_product_reviews', 'amazon_product_search', 'apple_app_store', 'bestbuy_products', 'booking_hotel_listings', 'crunchbase_company', 'ebay_product', 'etsy_products', 'facebook_company_reviews', 'facebook_events', 'facebook_marketplace_listings', 'facebook_posts', 'github_repository_file', 'google_maps_reviews', 'google_play_store', 'google_shopping', 'homedepot_products', 'instagram_comments', 'instagram_posts', 'instagram_profiles', 'instagram_reels', 'linkedin_company_profile', 'linkedin_job_listings', 'linkedin_people_search', 'linkedin_person_profile', 'linkedin_posts', 'reddit_posts', 'reuter_news', 'tiktok_comments', 'tiktok_posts', 'tiktok_profiles', 'tiktok_shop', 'walmart_product', 'walmart_seller', 'x_posts', 'yahoo_finance_business', 'youtube_comments', 'youtube_profiles', 'youtube_videos', 'zara_products', 'zillow_properties_listing', 'zoominfo_company_profile']
9
 
10
- def toggle_fields(selected):
11
  inputs = ["url", "keyword", "first_name", "last_name", "days_limit", "num_of_reviews", "num_of_comments"]
12
  wanted = set(DATASET_FIELDS.get(selected, []))
13
- def vis(name):
 
14
  return gr.update(visible=name in wanted)
15
- return tuple(vis(n) for n in inputs)
16
 
17
- def run(dataset, url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments):
 
 
 
18
  return tool(
19
  dataset=dataset,
20
  url=url,
@@ -26,29 +29,34 @@ def run(dataset, url, keyword, first_name, last_name, days_limit, num_of_reviews
26
  num_of_comments=num_of_comments,
27
  )
28
 
29
- with gr.Blocks() as demo:
30
- gr.Markdown("### Bright Data dataset fetch")
31
- dataset = gr.Dropdown(choices=CHOICES, label="Dataset", multiselect=False, value=CHOICES[0])
32
- url = gr.Textbox(label="URL", placeholder="https://...", visible=True)
33
- keyword = gr.Textbox(label="Keyword", visible=False)
34
- first_name = gr.Textbox(label="First name", visible=False)
35
- last_name = gr.Textbox(label="Last name", visible=False)
36
- days_limit = gr.Textbox(label="Days limit (e.g. 3)", visible=False)
37
- num_of_reviews = gr.Textbox(label="Number of reviews", visible=False)
38
- num_of_comments = gr.Textbox(label="Number of comments", visible=False)
39
-
40
- dataset.change(
41
- toggle_fields,
42
- inputs=[dataset],
43
- outputs=[url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments],
44
- )
45
-
46
- run_btn = gr.Button("Run")
47
- output = gr.Textbox(label="Output", lines=12)
48
- run_btn.click(
49
- run,
50
- inputs=[dataset, url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments],
51
- outputs=output,
52
- )
53
 
54
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
  import gradio as gr
 
4
 
5
+ from tool import BrightDataDatasetTool, DATASET_CHOICES, DATASET_FIELDS
6
+
7
  tool = BrightDataDatasetTool()
8
 
 
 
9
 
10
+ def toggle_fields(selected: str):
11
  inputs = ["url", "keyword", "first_name", "last_name", "days_limit", "num_of_reviews", "num_of_comments"]
12
  wanted = set(DATASET_FIELDS.get(selected, []))
13
+
14
+ def visibility(name: str):
15
  return gr.update(visible=name in wanted)
 
16
 
17
+ return tuple(visibility(name) for name in inputs)
18
+
19
+
20
+ def run(dataset: str, url: str, keyword: str, first_name: str, last_name: str, days_limit: str, num_of_reviews: str, num_of_comments: str) -> str:
21
  return tool(
22
  dataset=dataset,
23
  url=url,
 
29
  num_of_comments=num_of_comments,
30
  )
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ def create_demo() -> gr.Blocks:
34
+ with gr.Blocks() as demo:
35
+ gr.Markdown("### Bright Data dataset fetch")
36
+ dataset = gr.Dropdown(choices=DATASET_CHOICES, label="Dataset", value=DATASET_CHOICES[0])
37
+ url = gr.Textbox(label="URL", placeholder="https://...", visible=True)
38
+ keyword = gr.Textbox(label="Keyword", visible=False)
39
+ first_name = gr.Textbox(label="First name", visible=False)
40
+ last_name = gr.Textbox(label="Last name", visible=False)
41
+ days_limit = gr.Textbox(label="Days limit (e.g. 3)", visible=False)
42
+ num_of_reviews = gr.Textbox(label="Number of reviews", visible=False)
43
+ num_of_comments = gr.Textbox(label="Number of comments", visible=False)
44
+
45
+ dataset.change(
46
+ toggle_fields,
47
+ inputs=[dataset],
48
+ outputs=[url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments],
49
+ )
50
+
51
+ run_btn = gr.Button("Run")
52
+ output = gr.Textbox(label="Output", lines=12)
53
+ run_btn.click(
54
+ run,
55
+ inputs=[dataset, url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments],
56
+ outputs=output,
57
+ )
58
+ return demo
59
+
60
+
61
+ if __name__ == "__main__":
62
+ create_demo().launch()
tool.py CHANGED
@@ -1,118 +1,83 @@
1
- from typing import Any, Optional
2
- from smolagents.tools import Tool
3
  import json
4
- import time
5
  import os
 
 
 
6
  import requests
 
 
 
 
 
 
 
 
 
7
 
8
  class BrightDataDatasetTool(Tool):
9
  name = "brightdata_dataset_fetch"
10
- description = "Trigger a Bright Data dataset collection and poll until the snapshot is ready. Choose a dataset key (e.g., amazon_product, linkedin_company_profile, google_maps_reviews). For most datasets, you only need to provide the URL parameter. For example: brightdata_dataset_fetch(dataset='linkedin_person_profile', url='https://linkedin.com/in/...')"
11
  output_type = "string"
12
 
13
- def __init__(self):
14
- # Keep dataset catalogue on the instance and build the inputs schema dynamically to satisfy tool validation.
15
- self.datasets = globals().get("DATASETS")
16
- if not self.datasets:
17
- import json
18
- fallback_json = r'{"amazon_product": {"dataset_id": "gd_l7q7dkf244hwjntr0", "description": "Quickly read structured amazon product data.\nRequires a valid product URL with /dp/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "amazon_product_reviews": {"dataset_id": "gd_le8e811kzy4ggddlq", "description": "Quickly read structured amazon product review data.\nRequires a valid product URL with /dp/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "amazon_product_search": {"dataset_id": "gd_lwdb4vjm1ehb499uxs", "description": "Quickly read structured amazon product search data.\nRequires a valid search keyword and amazon domain URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["keyword", "url"], "fixed_values": {"pages_to_search": "1"}}, "walmart_product": {"dataset_id": "gd_l95fol7l1ru6rlo116", "description": "Quickly read structured walmart product data.\nRequires a valid product URL with /ip/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "walmart_seller": {"dataset_id": "gd_m7ke48w81ocyu4hhz0", "description": "Quickly read structured walmart seller data.\nRequires a valid walmart seller URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "ebay_product": {"dataset_id": "gd_ltr9mjt81n0zzdk1fb", "description": "Quickly read structured ebay product data.\nRequires a valid ebay product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "homedepot_products": {"dataset_id": "gd_lmusivh019i7g97q2n", "description": "Quickly read structured homedepot product data.\nRequires a valid homedepot product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "zara_products": {"dataset_id": "gd_lct4vafw1tgx27d4o0", "description": "Quickly read structured zara product data.\nRequires a valid zara product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "etsy_products": {"dataset_id": "gd_ltppk0jdv1jqz25mz", "description": "Quickly read structured etsy product data.\nRequires a valid etsy product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "bestbuy_products": {"dataset_id": "gd_ltre1jqe1jfr7cccf", "description": "Quickly read structured bestbuy product data.\nRequires a valid bestbuy product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_person_profile": {"dataset_id": "gd_l1viktl72bvl7bjuj0", "description": "Quickly read structured linkedin people profile data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_company_profile": {"dataset_id": "gd_l1vikfnt1wgvvqz95w", "description": "Quickly read structured linkedin company profile data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_job_listings": {"dataset_id": "gd_lpfll7v5hcqtkxl6l", "description": "Quickly read structured linkedin job listings data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_posts": {"dataset_id": "gd_lyy3tktm25m4avu764", "description": "Quickly read structured linkedin posts data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_people_search": {"dataset_id": "gd_m8d03he47z8nwb5xc", "description": "Quickly read structured linkedin people search data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url", "first_name", "last_name"]}, "crunchbase_company": {"dataset_id": "gd_l1vijqt9jfj7olije", "description": "Quickly read structured crunchbase company data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "zoominfo_company_profile": {"dataset_id": "gd_m0ci4a4ivx3j5l6nx", "description": "Quickly read structured ZoomInfo company profile data.\nRequires a valid ZoomInfo company URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "instagram_profiles": {"dataset_id": "gd_l1vikfch901nx3by4", "description": "Quickly read structured Instagram profile data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "instagram_posts": {"dataset_id": "gd_lk5ns7kz21pck8jpis", "description": "Quickly read structured Instagram post data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "instagram_reels": {"dataset_id": "gd_lyclm20il4r5helnj", "description": "Quickly read structured Instagram reel data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "instagram_comments": {"dataset_id": "gd_ltppn085pokosxh13", "description": "Quickly read structured Instagram comments data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "facebook_posts": {"dataset_id": "gd_lyclm1571iy3mv57zw", "description": "Quickly read structured Facebook post data.\nRequires a valid Facebook post URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "facebook_marketplace_listings": {"dataset_id": "gd_lvt9iwuh6fbcwmx1a", "description": "Quickly read structured Facebook marketplace listing data.\nRequires a valid Facebook marketplace listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "facebook_company_reviews": {"dataset_id": "gd_m0dtqpiu1mbcyc2g86", "description": "Quickly read structured Facebook company reviews data.\nRequires a valid Facebook company URL and number of reviews.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url", "num_of_reviews"]}, "facebook_events": {"dataset_id": "gd_m14sd0to1jz48ppm51", "description": "Quickly read structured Facebook events data.\nRequires a valid Facebook event URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "tiktok_profiles": {"dataset_id": "gd_l1villgoiiidt09ci", "description": "Quickly read structured Tiktok profiles data.\nRequires a valid Tiktok profile URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "tiktok_posts": {"dataset_id": "gd_lu702nij2f790tmv9h", "description": "Quickly read structured Tiktok post data.\nRequires a valid Tiktok post URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "tiktok_shop": {"dataset_id": "gd_m45m1u911dsa4274pi", "description": "Quickly read structured Tiktok shop data.\nRequires a valid Tiktok shop product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "tiktok_comments": {"dataset_id": "gd_lkf2st302ap89utw5k", "description": "Quickly read structured Tiktok comments data.\nRequires a valid Tiktok video URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "google_maps_reviews": {"dataset_id": "gd_luzfs1dn2oa0teb81", "description": "Quickly read structured Google maps reviews data.\nRequires a valid Google maps URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url", "days_limit"], "defaults": {"days_limit": "3"}}, "google_shopping": {"dataset_id": "gd_ltppk50q18kdw67omz", "description": "Quickly read structured Google shopping data.\nRequires a valid Google shopping product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "google_play_store": {"dataset_id": "gd_lsk382l8xei8vzm4u", "description": "Quickly read structured Google play store data.\nRequires a valid Google play store app URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "apple_app_store": {"dataset_id": "gd_lsk9ki3u2iishmwrui", "description": "Quickly read structured apple app store data.\nRequires a valid apple app store app URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "reuter_news": {"dataset_id": "gd_lyptx9h74wtlvpnfu", "description": "Quickly read structured reuter news data.\nRequires a valid reuter news report URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "github_repository_file": {"dataset_id": "gd_lyrexgxc24b3d4imjt", "description": "Quickly read structured github repository data.\nRequires a valid github repository file URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "yahoo_finance_business": {"dataset_id": "gd_lmrpz3vxmz972ghd7", "description": "Quickly read structured yahoo finance business data.\nRequires a valid yahoo finance business URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "x_posts": {"dataset_id": "gd_lwxkxvnf1cynvib9co", "description": "Quickly read structured X post data.\nRequires a valid X post URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "zillow_properties_listing": {"dataset_id": "gd_lfqkr8wm13ixtbd8f5", "description": "Quickly read structured zillow properties listing data.\nRequires a valid zillow properties listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "booking_hotel_listings": {"dataset_id": "gd_m5mbdl081229ln6t4a", "description": "Quickly read structured booking hotel listings data.\nRequires a valid booking hotel listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "youtube_profiles": {"dataset_id": "gd_lk538t2k2p1k3oos71", "description": "Quickly read structured youtube profiles data.\nRequires a valid youtube profile URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "youtube_comments": {"dataset_id": "gd_lk9q0ew71spt1mxywf", "description": "Quickly read structured youtube comments data.\nRequires a valid youtube video URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url", "num_of_comments"], "defaults": {"num_of_comments": "10"}}, "reddit_posts": {"dataset_id": "gd_lvz8ah06191smkebj4", "description": "Quickly read structured reddit posts data.\nRequires a valid reddit post URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "youtube_videos": {"dataset_id": "gd_lk56epmy2i5g7lzu0k", "description": "Quickly read structured YouTube videos data.\nRequires a valid YouTube video URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}}'
19
- self.datasets = json.loads(fallback_json)
20
  self.inputs = {
21
  "dataset": {
22
  "type": "string",
23
  "description": "Dataset key",
24
- # Provide choices so UI renders a dropdown instead of a long list.
25
  "enum": sorted(self.datasets.keys()),
26
  },
27
  "url": {
28
  "type": "string",
29
- "description": "URL for the dataset (required for most datasets)",
30
  "nullable": True,
31
  },
32
  "keyword": {
33
  "type": "string",
34
- "description": "Search keyword (for search datasets like amazon_product_search)",
35
  "nullable": True,
36
  },
37
  "first_name": {
38
  "type": "string",
39
- "description": "First name (for datasets like linkedin_people_search)",
40
  "nullable": True,
41
  },
42
  "last_name": {
43
  "type": "string",
44
- "description": "Last name (for datasets like linkedin_people_search)",
45
  "nullable": True,
46
  },
47
  "days_limit": {
48
  "type": "string",
49
- "description": "Days limit (for datasets like google_maps_reviews, default: 3)",
50
  "nullable": True,
51
  },
52
  "num_of_reviews": {
53
  "type": "string",
54
- "description": "Number of reviews (for datasets like facebook_company_reviews)",
55
  "nullable": True,
56
  },
57
  "num_of_comments": {
58
  "type": "string",
59
- "description": "Number of comments (for datasets like youtube_comments, default: 10)",
60
  "nullable": True,
61
  },
62
  }
63
  super().__init__()
64
 
65
- def _prepare_payload(self, dataset_key: str, params):
66
- """Validate required fields, apply defaults, and merge fixed values."""
67
- config = self.datasets[dataset_key]
68
- payload = {}
69
-
70
- defaults = config.get("defaults", {})
71
- fixed_values = config.get("fixed_values", {})
72
-
73
- for field in config["inputs"]:
74
- if field in params:
75
- payload[field] = params[field]
76
- elif field in defaults:
77
- payload[field] = defaults[field]
78
- else:
79
- raise ValueError(f"Missing required field '{field}' for dataset '{dataset_key}'")
80
-
81
- payload.update(fixed_values)
82
- return payload
83
-
84
  def forward(
85
  self,
86
  dataset: str,
87
- url: str = None,
88
- keyword: str = None,
89
- first_name: str = None,
90
- last_name: str = None,
91
- days_limit: str = None,
92
- num_of_reviews: str = None,
93
- num_of_comments: str = None,
94
  ) -> str:
95
- """
96
- Trigger a dataset run and poll until results are ready.
97
-
98
- Args:
99
- dataset: The dataset key from DATASETS.
100
- url: URL for the dataset (required for most datasets).
101
- keyword: Search keyword (for search datasets).
102
- first_name: First name (for people search datasets).
103
- last_name: Last name (for people search datasets).
104
- days_limit: Days limit (for time-based datasets).
105
- num_of_reviews: Number of reviews to fetch.
106
- num_of_comments: Number of comments to fetch.
107
-
108
- Returns:
109
- JSON string of the snapshot data once ready.
110
- """
111
- import os
112
- import json
113
- import time
114
- import requests
115
-
116
  api_token = os.getenv("BRIGHT_DATA_API_TOKEN")
117
  if not api_token:
118
  raise ValueError("BRIGHT_DATA_API_TOKEN not found in environment variables")
@@ -120,8 +85,36 @@ class BrightDataDatasetTool(Tool):
120
  if dataset not in self.datasets:
121
  raise ValueError(f"Unknown dataset '{dataset}'. Valid options: {', '.join(sorted(self.datasets.keys()))}")
122
 
123
- # Build params dict from provided arguments
124
- params = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  if url is not None:
126
  params["url"] = url
127
  if keyword is not None:
@@ -136,124 +129,129 @@ class BrightDataDatasetTool(Tool):
136
  params["num_of_reviews"] = num_of_reviews
137
  if num_of_comments is not None:
138
  params["num_of_comments"] = num_of_comments
 
139
 
140
- payload = self._prepare_payload(dataset, params)
141
- dataset_id = self.datasets[dataset]["dataset_id"]
 
142
 
143
- trigger_url = "https://api.brightdata.com/datasets/v3/trigger"
144
- trigger_headers = {
145
- "Authorization": f"Bearer {api_token}",
146
- "Content-Type": "application/json",
147
- }
 
 
 
 
 
 
 
 
148
 
149
- trigger_response = requests.post(
 
 
 
150
  trigger_url,
151
  params={"dataset_id": dataset_id, "include_errors": "true"},
152
  json=[payload],
153
- headers=trigger_headers,
 
 
 
154
  timeout=60,
155
  )
156
- trigger_response.raise_for_status()
157
- snapshot_id = trigger_response.json().get("snapshot_id")
158
-
159
  if not snapshot_id:
160
  raise RuntimeError("No snapshot ID returned from Bright Data.")
 
161
 
 
162
  snapshot_url = f"https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id}"
163
  max_attempts = 600
164
  attempts = 0
165
 
166
  while attempts < max_attempts:
167
- try:
168
- response = requests.get(
169
- snapshot_url,
170
- params={"format": "json"},
171
- headers={"Authorization": f"Bearer {api_token}"},
172
- timeout=30,
173
- )
174
-
175
- if response.status_code == 400:
176
- response.raise_for_status()
177
-
178
- data = response.json()
179
- if isinstance(data, list):
180
- return json.dumps(data, indent=2)
181
-
182
- status = data.get("status") if isinstance(data, dict) else None
183
- if status not in {"running", "building"}:
184
- return json.dumps(data, indent=2)
185
-
186
- attempts += 1
187
- time.sleep(1)
188
-
189
- except requests.exceptions.RequestException as exc:
190
- if getattr(getattr(exc, "response", None), "status_code", None) == 400:
191
- raise
192
- attempts += 1
193
- time.sleep(1)
194
 
195
- raise TimeoutError(f"Timeout waiting for snapshot {snapshot_id} after {max_attempts} seconds")
 
196
 
197
- def _get_gradio_app_code(self, tool_module_name: str = "tool") -> str:
198
- """
199
- Override the default app to render a dropdown for dataset selection
200
- instead of a long text field. Kept minimal: single-select dropdown,
201
- and shows only relevant parameter fields for the chosen dataset.
202
- """
203
- choices = sorted(self.datasets.keys())
204
- dataset_fields = {k: v["inputs"] for k, v in self.datasets.items()}
205
- return f"""import gradio as gr
206
- import importlib
207
-
208
- BrightDataDatasetTool = importlib.import_module("tool").BrightDataDatasetTool
209
- tool = BrightDataDatasetTool()
210
-
211
- DATASET_FIELDS = {dataset_fields}
212
- CHOICES = {choices}
213
-
214
- def toggle_fields(selected):
215
- inputs = ["url", "keyword", "first_name", "last_name", "days_limit", "num_of_reviews", "num_of_comments"]
216
- wanted = set(DATASET_FIELDS.get(selected, []))
217
- def vis(name):
218
- return gr.update(visible=name in wanted)
219
- return tuple(vis(n) for n in inputs)
220
-
221
- def run(dataset, url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments):
222
- return tool(
223
- dataset=dataset,
224
- url=url,
225
- keyword=keyword,
226
- first_name=first_name,
227
- last_name=last_name,
228
- days_limit=days_limit,
229
- num_of_reviews=num_of_reviews,
230
- num_of_comments=num_of_comments,
231
- )
232
 
233
- with gr.Blocks() as demo:
234
- gr.Markdown("### Bright Data dataset fetch")
235
- dataset = gr.Dropdown(choices=CHOICES, label="Dataset", multiselect=False, value=CHOICES[0])
236
- url = gr.Textbox(label="URL", placeholder="https://...", visible=True)
237
- keyword = gr.Textbox(label="Keyword", visible=False)
238
- first_name = gr.Textbox(label="First name", visible=False)
239
- last_name = gr.Textbox(label="Last name", visible=False)
240
- days_limit = gr.Textbox(label="Days limit (e.g. 3)", visible=False)
241
- num_of_reviews = gr.Textbox(label="Number of reviews", visible=False)
242
- num_of_comments = gr.Textbox(label="Number of comments", visible=False)
243
-
244
- dataset.change(
245
- toggle_fields,
246
- inputs=[dataset],
247
- outputs=[url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments],
248
- )
249
 
250
- run_btn = gr.Button("Run")
251
- output = gr.Textbox(label="Output", lines=12)
252
- run_btn.click(
253
- run,
254
- inputs=[dataset, url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments],
255
- outputs=output,
256
- )
257
 
258
- demo.launch()
259
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
  import json
 
4
  import os
5
+ import time
6
+ from typing import Any, Dict, List, Optional
7
+
8
  import requests
9
+ from smolagents.tools import Tool
10
+
11
+
12
+ DATASETS_JSON = r'''{"amazon_product": {"dataset_id": "gd_l7q7dkf244hwjntr0", "description": "Quickly read structured amazon product data.\nRequires a valid product URL with /dp/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "amazon_product_reviews": {"dataset_id": "gd_le8e811kzy4ggddlq", "description": "Quickly read structured amazon product review data.\nRequires a valid product URL with /dp/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "amazon_product_search": {"dataset_id": "gd_lwdb4vjm1ehb499uxs", "description": "Quickly read structured amazon product search data.\nRequires a valid search keyword and amazon domain URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["keyword", "url"], "fixed_values": {"pages_to_search": "1"}}, "walmart_product": {"dataset_id": "gd_l95fol7l1ru6rlo116", "description": "Quickly read structured walmart product data.\nRequires a valid product URL with /ip/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "walmart_seller": {"dataset_id": "gd_m7ke48w81ocyu4hhz0", "description": "Quickly read structured walmart seller data.\nRequires a valid walmart seller URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "ebay_product": {"dataset_id": "gd_ltr9mjt81n0zzdk1fb", "description": "Quickly read structured ebay product data.\nRequires a valid ebay product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "homedepot_products": {"dataset_id": "gd_lmusivh019i7g97q2n", "description": "Quickly read structured homedepot product data.\nRequires a valid homedepot product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "zara_products": {"dataset_id": "gd_lct4vafw1tgx27d4o0", "description": "Quickly read structured zara product data.\nRequires a valid zara product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "etsy_products": {"dataset_id": "gd_ltppk0jdv1jqz25mz", "description": "Quickly read structured etsy product data.\nRequires a valid etsy product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "bestbuy_products": {"dataset_id": "gd_ltre1jqe1jfr7cccf", "description": "Quickly read structured bestbuy product data.\nRequires a valid bestbuy product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_person_profile": {"dataset_id": "gd_l1viktl72bvl7bjuj0", "description": "Quickly read structured linkedin people profile data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_company_profile": {"dataset_id": "gd_l1vikfnt1wgvvqz95w", "description": "Quickly read structured linkedin company profile data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_job_listings": {"dataset_id": "gd_lpfll7v5hcqtkxl6l", "description": "Quickly read structured linkedin job listings data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_posts": {"dataset_id": "gd_lyy3tktm25m4avu764", "description": "Quickly read structured linkedin posts data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "linkedin_people_search": {"dataset_id": "gd_m8d03he47z8nwb5xc", "description": "Quickly read structured linkedin people search data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url", "first_name", "last_name"]}, "crunchbase_company": {"dataset_id": "gd_l1vijqt9jfj7olije", "description": "Quickly read structured crunchbase company data.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "zoominfo_company_profile": {"dataset_id": "gd_m0ci4a4ivx3j5l6nx", "description": "Quickly read structured ZoomInfo company profile data.\nRequires a valid ZoomInfo company URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "instagram_profiles": {"dataset_id": "gd_l1vikfch901nx3by4", "description": "Quickly read structured Instagram profile data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "instagram_posts": {"dataset_id": "gd_lk5ns7kz21pck8jpis", "description": "Quickly read structured Instagram post data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "instagram_reels": {"dataset_id": "gd_lyclm20il4r5helnj", "description": "Quickly read structured Instagram reel data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "instagram_comments": {"dataset_id": "gd_ltppn085pokosxh13", "description": "Quickly read structured Instagram comments data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "facebook_posts": {"dataset_id": "gd_lyclm1571iy3mv57zw", "description": "Quickly read structured Facebook post data.\nRequires a valid Facebook post URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "facebook_marketplace_listings": {"dataset_id": "gd_lvt9iwuh6fbcwmx1a", "description": "Quickly read structured Facebook marketplace listing data.\nRequires a valid Facebook marketplace listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "facebook_company_reviews": {"dataset_id": "gd_m0dtqpiu1mbcyc2g86", "description": "Quickly read structured Facebook company reviews data.\nRequires a valid Facebook company URL and number of reviews.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url", "num_of_reviews"]}, "facebook_events": {"dataset_id": "gd_m14sd0to1jz48ppm51", "description": "Quickly read structured Facebook events data.\nRequires a valid Facebook event URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "tiktok_profiles": {"dataset_id": "gd_l1villgoiiidt09ci", "description": "Quickly read structured Tiktok profiles data.\nRequires a valid Tiktok profile URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "tiktok_posts": {"dataset_id": "gd_lu702nij2f790tmv9h", "description": "Quickly read structured Tiktok post data.\nRequires a valid Tiktok post URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "tiktok_shop": {"dataset_id": "gd_m45m1u911dsa4274pi", "description": "Quickly read structured Tiktok shop data.\nRequires a valid Tiktok shop product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "tiktok_comments": {"dataset_id": "gd_lkf2st302ap89utw5k", "description": "Quickly read structured Tiktok comments data.\nRequires a valid Tiktok video URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "google_maps_reviews": {"dataset_id": "gd_luzfs1dn2oa0teb81", "description": "Quickly read structured Google maps reviews data.\nRequires a valid Google maps URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url", "days_limit"], "defaults": {"days_limit": "3"}}, "google_shopping": {"dataset_id": "gd_ltppk50q18kdw67omz", "description": "Quickly read structured Google shopping data.\nRequires a valid Google shopping product URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "google_play_store": {"dataset_id": "gd_lsk382l8xei8vzm4u", "description": "Quickly read structured Google play store data.\nRequires a valid Google play store app URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "apple_app_store": {"dataset_id": "gd_lsk9ki3u2iishmwrui", "description": "Quickly read structured apple app store data.\nRequires a valid apple app store app URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "reuter_news": {"dataset_id": "gd_lyptx9h74wtlvpnfu", "description": "Quickly read structured reuter news data.\nRequires a valid reuter news report URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "github_repository_file": {"dataset_id": "gd_lyrexgxc24b3d4imjt", "description": "Quickly read structured github repository data.\nRequires a valid github repository file URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "yahoo_finance_business": {"dataset_id": "gd_lmrpz3vxmz972ghd7", "description": "Quickly read structured yahoo finance business data.\nRequires a valid yahoo finance business URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "x_posts": {"dataset_id": "gd_lwxkxvnf1cynvib9co", "description": "Quickly read structured X post data.\nRequires a valid X post URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "zillow_properties_listing": {"dataset_id": "gd_lfqkr8wm13ixtbd8f5", "description": "Quickly read structured zillow properties listing data.\nRequires a valid zillow properties listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "booking_hotel_listings": {"dataset_id": "gd_m5mbdl081229ln6t4a", "description": "Quickly read structured booking hotel listings data.\nRequires a valid booking hotel listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "youtube_profiles": {"dataset_id": "gd_lk538t2k2p1k3oos71", "description": "Quickly read structured youtube profiles data.\nRequires a valid youtube profile URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "youtube_comments": {"dataset_id": "gd_lk9q0ew71spt1mxywf", "description": "Quickly read structured youtube comments data.\nRequires a valid youtube video URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url", "num_of_comments"], "defaults": {"num_of_comments": "10"}}, "reddit_posts": {"dataset_id": "gd_lvz8ah06191smkebj4", "description": "Quickly read structured reddit posts data.\nRequires a valid reddit post URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}, "youtube_videos": {"dataset_id": "gd_lk56epmy2i5g7lzu0k", "description": "Quickly read structured YouTube videos data.\nRequires a valid YouTube video URL.\nThis can be a cache lookup, so it can be more reliable than scraping.", "inputs": ["url"]}}'''
13
+
14
+ DATASETS: Dict[str, Any] = json.loads(DATASETS_JSON)
15
+ DATASET_FIELDS: Dict[str, List[str]] = {key: value["inputs"] for key, value in DATASETS.items()}
16
+ DATASET_CHOICES = sorted(DATASETS.keys())
17
+
18
 
19
  class BrightDataDatasetTool(Tool):
20
  name = "brightdata_dataset_fetch"
21
+ description = "Trigger a Bright Data dataset collection and poll until the snapshot is ready."
22
  output_type = "string"
23
 
24
+ def __init__(self, datasets: Optional[Dict[str, Any]] = None) -> None:
25
+ self.datasets = datasets or DATASETS
 
 
 
 
 
26
  self.inputs = {
27
  "dataset": {
28
  "type": "string",
29
  "description": "Dataset key",
 
30
  "enum": sorted(self.datasets.keys()),
31
  },
32
  "url": {
33
  "type": "string",
34
+ "description": "URL for the dataset",
35
  "nullable": True,
36
  },
37
  "keyword": {
38
  "type": "string",
39
+ "description": "Search keyword",
40
  "nullable": True,
41
  },
42
  "first_name": {
43
  "type": "string",
44
+ "description": "First name",
45
  "nullable": True,
46
  },
47
  "last_name": {
48
  "type": "string",
49
+ "description": "Last name",
50
  "nullable": True,
51
  },
52
  "days_limit": {
53
  "type": "string",
54
+ "description": "Days limit",
55
  "nullable": True,
56
  },
57
  "num_of_reviews": {
58
  "type": "string",
59
+ "description": "Number of reviews",
60
  "nullable": True,
61
  },
62
  "num_of_comments": {
63
  "type": "string",
64
+ "description": "Number of comments",
65
  "nullable": True,
66
  },
67
  }
68
  super().__init__()
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def forward(
71
  self,
72
  dataset: str,
73
+ url: Optional[str] = None,
74
+ keyword: Optional[str] = None,
75
+ first_name: Optional[str] = None,
76
+ last_name: Optional[str] = None,
77
+ days_limit: Optional[str] = None,
78
+ num_of_reviews: Optional[str] = None,
79
+ num_of_comments: Optional[str] = None,
80
  ) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  api_token = os.getenv("BRIGHT_DATA_API_TOKEN")
82
  if not api_token:
83
  raise ValueError("BRIGHT_DATA_API_TOKEN not found in environment variables")
 
85
  if dataset not in self.datasets:
86
  raise ValueError(f"Unknown dataset '{dataset}'. Valid options: {', '.join(sorted(self.datasets.keys()))}")
87
 
88
+ params = self._build_params(
89
+ url=url,
90
+ keyword=keyword,
91
+ first_name=first_name,
92
+ last_name=last_name,
93
+ days_limit=days_limit,
94
+ num_of_reviews=num_of_reviews,
95
+ num_of_comments=num_of_comments,
96
+ )
97
+
98
+ payload = self._prepare_payload(dataset, params)
99
+
100
+ try:
101
+ snapshot_id = self._trigger_snapshot(dataset, payload, api_token)
102
+ data = self._poll_snapshot(snapshot_id, api_token)
103
+ return json.dumps(data, indent=2)
104
+ except requests.exceptions.RequestException as exc:
105
+ return json.dumps({"error": str(exc)})
106
+
107
+ def _build_params(
108
+ self,
109
+ url: Optional[str],
110
+ keyword: Optional[str],
111
+ first_name: Optional[str],
112
+ last_name: Optional[str],
113
+ days_limit: Optional[str],
114
+ num_of_reviews: Optional[str],
115
+ num_of_comments: Optional[str],
116
+ ) -> Dict[str, str]:
117
+ params: Dict[str, str] = {}
118
  if url is not None:
119
  params["url"] = url
120
  if keyword is not None:
 
129
  params["num_of_reviews"] = num_of_reviews
130
  if num_of_comments is not None:
131
  params["num_of_comments"] = num_of_comments
132
+ return params
133
 
134
+ def _prepare_payload(self, dataset_key: str, params: Dict[str, str]) -> Dict[str, str]:
135
+ config = self.datasets[dataset_key]
136
+ payload: Dict[str, str] = {}
137
 
138
+ defaults = config.get("defaults", {})
139
+ fixed_values = config.get("fixed_values", {})
140
+
141
+ for field in config["inputs"]:
142
+ if field in params:
143
+ payload[field] = params[field]
144
+ elif field in defaults:
145
+ payload[field] = defaults[field]
146
+ else:
147
+ raise ValueError(f"Missing required field '{field}' for dataset '{dataset_key}'")
148
+
149
+ payload.update(fixed_values)
150
+ return payload
151
 
152
+ def _trigger_snapshot(self, dataset_key: str, payload: Dict[str, str], api_token: str) -> str:
153
+ dataset_id = self.datasets[dataset_key]["dataset_id"]
154
+ trigger_url = "https://api.brightdata.com/datasets/v3/trigger"
155
+ response = requests.post(
156
  trigger_url,
157
  params={"dataset_id": dataset_id, "include_errors": "true"},
158
  json=[payload],
159
+ headers={
160
+ "Authorization": f"Bearer {api_token}",
161
+ "Content-Type": "application/json",
162
+ },
163
  timeout=60,
164
  )
165
+ response.raise_for_status()
166
+ snapshot_id = response.json().get("snapshot_id")
 
167
  if not snapshot_id:
168
  raise RuntimeError("No snapshot ID returned from Bright Data.")
169
+ return snapshot_id
170
 
171
+ def _poll_snapshot(self, snapshot_id: str, api_token: str) -> Any:
172
  snapshot_url = f"https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id}"
173
  max_attempts = 600
174
  attempts = 0
175
 
176
  while attempts < max_attempts:
177
+ response = requests.get(
178
+ snapshot_url,
179
+ params={"format": "json"},
180
+ headers={"Authorization": f"Bearer {api_token}"},
181
+ timeout=30,
182
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
+ if response.status_code == 400:
185
+ response.raise_for_status()
186
 
187
+ data = response.json()
188
+ if isinstance(data, list):
189
+ return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
+ status = data.get("status") if isinstance(data, dict) else None
192
+ if status not in {"running", "building"}:
193
+ return data
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
+ attempts += 1
196
+ time.sleep(1)
197
+
198
+ raise TimeoutError(f"Timeout waiting for snapshot {snapshot_id} after {max_attempts} seconds")
 
 
 
199
 
200
+ def _get_gradio_app_code(self, tool_module_name: str = "tool") -> str:
201
+ choices = sorted(self.datasets.keys())
202
+ dataset_fields = {key: value["inputs"] for key, value in self.datasets.items()}
203
+ return f"""import gradio as gr
204
+ import importlib
205
+
206
+ BrightDataDatasetTool = importlib.import_module("{tool_module_name}").BrightDataDatasetTool
207
+ tool = BrightDataDatasetTool()
208
+
209
+ DATASET_FIELDS = {dataset_fields}
210
+ CHOICES = {choices}
211
+
212
+ def toggle_fields(selected):
213
+ inputs = ["url", "keyword", "first_name", "last_name", "days_limit", "num_of_reviews", "num_of_comments"]
214
+ wanted = set(DATASET_FIELDS.get(selected, []))
215
+ def vis(name):
216
+ return gr.update(visible=name in wanted)
217
+ return tuple(vis(name) for name in inputs)
218
+
219
+ def run(dataset, url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments):
220
+ return tool(
221
+ dataset=dataset,
222
+ url=url,
223
+ keyword=keyword,
224
+ first_name=first_name,
225
+ last_name=last_name,
226
+ days_limit=days_limit,
227
+ num_of_reviews=num_of_reviews,
228
+ num_of_comments=num_of_comments,
229
+ )
230
+
231
+ with gr.Blocks() as demo:
232
+ gr.Markdown("### Bright Data dataset fetch")
233
+ dataset = gr.Dropdown(choices=CHOICES, label="Dataset", value=CHOICES[0])
234
+ url = gr.Textbox(label="URL", placeholder="https://...", visible=True)
235
+ keyword = gr.Textbox(label="Keyword", visible=False)
236
+ first_name = gr.Textbox(label="First name", visible=False)
237
+ last_name = gr.Textbox(label="Last name", visible=False)
238
+ days_limit = gr.Textbox(label="Days limit (e.g. 3)", visible=False)
239
+ num_of_reviews = gr.Textbox(label="Number of reviews", visible=False)
240
+ num_of_comments = gr.Textbox(label="Number of comments", visible=False)
241
+
242
+ dataset.change(
243
+ toggle_fields,
244
+ inputs=[dataset],
245
+ outputs=[url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments],
246
+ )
247
+
248
+ run_btn = gr.Button("Run")
249
+ output = gr.Textbox(label="Output", lines=12)
250
+ run_btn.click(
251
+ run,
252
+ inputs=[dataset, url, keyword, first_name, last_name, days_limit, num_of_reviews, num_of_comments],
253
+ outputs=output,
254
+ )
255
+
256
+ demo.launch()
257
+ """