Spaces:
Paused
Paused
Update tools/scrape_text.py
Browse files- tools/scrape_text.py +8 -0
tools/scrape_text.py
CHANGED
|
@@ -2,6 +2,9 @@ from smolagents.tools import Tool
|
|
| 2 |
from helium import S
|
| 3 |
from selenium.webdriver.common.by import By
|
| 4 |
import json
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
class ScrapeTextTool(Tool):
|
| 7 |
name = "scrape_text"
|
|
@@ -13,9 +16,14 @@ class ScrapeTextTool(Tool):
|
|
| 13 |
output_type = "string"
|
| 14 |
|
| 15 |
def __init__(self, driver):
|
|
|
|
| 16 |
self.driver = driver
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def forward(self, selector="p", extract_table=False):
|
|
|
|
|
|
|
| 19 |
try:
|
| 20 |
if extract_table:
|
| 21 |
tables = self.driver.find_elements(By.CSS_SELECTOR, selector)
|
|
|
|
| 2 |
from helium import S
|
| 3 |
from selenium.webdriver.common.by import By
|
| 4 |
import json
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
|
| 9 |
class ScrapeTextTool(Tool):
|
| 10 |
name = "scrape_text"
|
|
|
|
| 16 |
output_type = "string"
|
| 17 |
|
| 18 |
def __init__(self, driver):
|
| 19 |
+
super().__init__()
|
| 20 |
self.driver = driver
|
| 21 |
+
self.is_initialized = self.driver is not None
|
| 22 |
+
logger.debug(f"ScrapeTextTool initialized: is_initialized={self.is_initialized}")
|
| 23 |
|
| 24 |
def forward(self, selector="p", extract_table=False):
|
| 25 |
+
if not self.is_initialized:
|
| 26 |
+
return "Error: ScrapeTextTool is not initialized"
|
| 27 |
try:
|
| 28 |
if extract_table:
|
| 29 |
tables = self.driver.find_elements(By.CSS_SELECTOR, selector)
|