Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- myTools/ExtractWebContentWithSelenium.py +42 -0
- myTools/ExtractWikipediaSection.py +42 -0
- myTools/GetLogo.py +52 -0
- myTools/GetPlaceholderImageTool.py +18 -0
- myTools/GetSVG.py +53 -0
- myTools/GetSVGList.py +50 -0
- myTools/__init__.py +0 -0
myTools/ExtractWebContentWithSelenium.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
from selenium import webdriver
|
| 3 |
+
from selenium.webdriver.chrome.options import Options
|
| 4 |
+
from selenium.webdriver.chrome.service import Service as ChromeService
|
| 5 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 6 |
+
from markdownify import markdownify as md
|
| 7 |
+
|
| 8 |
+
class ExtractWebContentWithSelenium(Tool):
|
| 9 |
+
name = "extract_web_content_selenium"
|
| 10 |
+
description = "Visit a webpage and extract the full HTML content of a web page."
|
| 11 |
+
|
| 12 |
+
inputs = {
|
| 13 |
+
"url": {
|
| 14 |
+
"type": "string",
|
| 15 |
+
"description": "URL of the page to load"
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
output_type = "string"
|
| 20 |
+
|
| 21 |
+
def forward(self, url: str) -> str:
|
| 22 |
+
chrome_options = Options()
|
| 23 |
+
chrome_options.add_argument("--headless")
|
| 24 |
+
chrome_options.add_argument("--no-sandbox")
|
| 25 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
| 26 |
+
chrome_options.add_argument(
|
| 27 |
+
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 28 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 29 |
+
"Chrome/114.0.0.0 Safari/537.36"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Installe automatiquement ChromeDriver
|
| 33 |
+
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
driver.get(url)
|
| 37 |
+
page_content = driver.page_source
|
| 38 |
+
markdown = md(page_content, heading_style="ATX")
|
| 39 |
+
finally:
|
| 40 |
+
driver.quit()
|
| 41 |
+
|
| 42 |
+
return markdown
|
myTools/ExtractWikipediaSection.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
import requests
|
| 3 |
+
import re
|
| 4 |
+
from markdownify import markdownify as md
|
| 5 |
+
|
| 6 |
+
class ExtractWikipediaSection(Tool):
|
| 7 |
+
name = "extract_wikipedia_section"
|
| 8 |
+
description = "Extracts a specific section from a Wikipedia page in Markdown format."
|
| 9 |
+
|
| 10 |
+
inputs = {
|
| 11 |
+
"url": {
|
| 12 |
+
"type": "string",
|
| 13 |
+
"description": "URL of the Wikipedia page"
|
| 14 |
+
},
|
| 15 |
+
"section": {
|
| 16 |
+
"type": "string",
|
| 17 |
+
"description": "Title of the section to extract"
|
| 18 |
+
},
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
output_type = "string"
|
| 22 |
+
|
| 23 |
+
def forward(self, url: str, section: str) -> str:
|
| 24 |
+
headers = {
|
| 25 |
+
"User-Agent": "Mozilla/5.0 (compatible; WebScraper/1.0; +https://example.com/bot)"
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
response = requests.get(url, headers=headers, timeout=10)
|
| 30 |
+
response.raise_for_status()
|
| 31 |
+
except Exception as e:
|
| 32 |
+
raise RuntimeError(f"Failed to fetch page: {e}")
|
| 33 |
+
|
| 34 |
+
markdown = md(response.text, heading_style="ATX")
|
| 35 |
+
|
| 36 |
+
# RegEx pour détecter la section markdown
|
| 37 |
+
pattern = rf"^##+\s*{re.escape(section)}\s*$(.*?)^##+"
|
| 38 |
+
match = re.search(pattern, markdown, re.DOTALL | re.MULTILINE)
|
| 39 |
+
if match:
|
| 40 |
+
return match.group(1).strip()
|
| 41 |
+
else:
|
| 42 |
+
return f"❌ Section '{section}' not found on page."
|
myTools/GetLogo.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
from selenium import webdriver
|
| 3 |
+
from selenium.webdriver.chrome.options import Options
|
| 4 |
+
from selenium.webdriver.chrome.service import Service as ChromeService
|
| 5 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 6 |
+
from markdownify import markdownify as md
|
| 7 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
| 8 |
+
from selenium.webdriver.support import expected_conditions as EC
|
| 9 |
+
from selenium.webdriver.common.by import By
|
| 10 |
+
import random
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class GetLogo(Tool):
|
| 14 |
+
name = "get_logo"
|
| 15 |
+
description = "Lookup and search for a logo and generate the SVG code"
|
| 16 |
+
|
| 17 |
+
inputs = {
|
| 18 |
+
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
output_type = "string"
|
| 22 |
+
|
| 23 |
+
def forward(self) -> str:
|
| 24 |
+
chrome_options = Options()
|
| 25 |
+
chrome_options.add_argument("--headless")
|
| 26 |
+
chrome_options.add_argument("--no-sandbox")
|
| 27 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
| 28 |
+
chrome_options.add_argument(
|
| 29 |
+
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 30 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 31 |
+
"Chrome/114.0.0.0 Safari/537.36"
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
url = f"https://logoipsum.com/"
|
| 38 |
+
driver.get(url)
|
| 39 |
+
wait = WebDriverWait(driver, 10)
|
| 40 |
+
buttons = wait.until(
|
| 41 |
+
EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.artwork"))
|
| 42 |
+
)
|
| 43 |
+
if not buttons:
|
| 44 |
+
return "Aucun résultat trouvé."
|
| 45 |
+
|
| 46 |
+
random_logo = random.choice(buttons)
|
| 47 |
+
svg_element = random_logo.find_element(By.TAG_NAME, "svg")
|
| 48 |
+
svg_code = svg_element.get_attribute("outerHTML")
|
| 49 |
+
|
| 50 |
+
return svg_code
|
| 51 |
+
finally:
|
| 52 |
+
driver.quit()
|
myTools/GetPlaceholderImageTool.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
import os
|
| 3 |
+
import requests
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class GetPlaceholderImageTool(Tool):
|
| 7 |
+
name = "get_placeholder_image_tool"
|
| 8 |
+
description = "Get placeholder image for website with specific size"
|
| 9 |
+
|
| 10 |
+
inputs = {
|
| 11 |
+
"height": {"type": "integer", "description": "The height of the placeholder"},
|
| 12 |
+
"width": {"type": "integer", "description": "The width of the placeholder"},
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
output_type = "string"
|
| 16 |
+
|
| 17 |
+
def forward(self, height: int, width: int) -> str:
|
| 18 |
+
return f"https://placehold.co/{width}x{height}"
|
myTools/GetSVG.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
from selenium import webdriver
|
| 3 |
+
from selenium.webdriver.chrome.options import Options
|
| 4 |
+
from selenium.webdriver.chrome.service import Service as ChromeService
|
| 5 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 6 |
+
from markdownify import markdownify as md
|
| 7 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
| 8 |
+
from selenium.webdriver.support import expected_conditions as EC
|
| 9 |
+
from selenium.webdriver.common.by import By
|
| 10 |
+
|
| 11 |
+
class GetSVG(Tool):
|
| 12 |
+
name = "get_svg"
|
| 13 |
+
description = "Lookup and search for an icon and generate the SVG code"
|
| 14 |
+
|
| 15 |
+
inputs = {
|
| 16 |
+
"request": {
|
| 17 |
+
"type": "string",
|
| 18 |
+
"description": "The description of the svg from GetSVGList Tool"
|
| 19 |
+
}
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
output_type = "string"
|
| 23 |
+
|
| 24 |
+
def forward(self, request: str) -> str:
|
| 25 |
+
chrome_options = Options()
|
| 26 |
+
chrome_options.add_argument("--headless")
|
| 27 |
+
chrome_options.add_argument("--no-sandbox")
|
| 28 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
| 29 |
+
chrome_options.add_argument(
|
| 30 |
+
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 31 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 32 |
+
"Chrome/114.0.0.0 Safari/537.36"
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
url = f"https://phosphoricons.com/?q={request}"
|
| 39 |
+
driver.get(url)
|
| 40 |
+
wait = WebDriverWait(driver, 10)
|
| 41 |
+
buttons = wait.until(
|
| 42 |
+
EC.presence_of_all_elements_located((By.CSS_SELECTOR, "button.grid-item"))
|
| 43 |
+
)
|
| 44 |
+
if not buttons:
|
| 45 |
+
return "Aucun résultat trouvé."
|
| 46 |
+
|
| 47 |
+
first_icon = buttons[0]
|
| 48 |
+
svg_element = first_icon.find_element(By.TAG_NAME, "svg")
|
| 49 |
+
svg_code = svg_element.get_attribute("outerHTML")
|
| 50 |
+
|
| 51 |
+
return svg_code
|
| 52 |
+
finally:
|
| 53 |
+
driver.quit()
|
myTools/GetSVGList.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
from selenium import webdriver
|
| 3 |
+
from selenium.webdriver.chrome.options import Options
|
| 4 |
+
from selenium.webdriver.chrome.service import Service as ChromeService
|
| 5 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 6 |
+
from markdownify import markdownify as md
|
| 7 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
| 8 |
+
from selenium.webdriver.support import expected_conditions as EC
|
| 9 |
+
from selenium.webdriver.common.by import By
|
| 10 |
+
|
| 11 |
+
class GetSVGList(Tool):
|
| 12 |
+
name = "get_svg_list"
|
| 13 |
+
description = "Get list of svg available to generate it with GetSVG Tool"
|
| 14 |
+
|
| 15 |
+
inputs = {
|
| 16 |
+
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
output_type = "array"
|
| 20 |
+
|
| 21 |
+
def forward(self) -> str:
|
| 22 |
+
chrome_options = Options()
|
| 23 |
+
chrome_options.add_argument("--headless")
|
| 24 |
+
chrome_options.add_argument("--no-sandbox")
|
| 25 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
| 26 |
+
chrome_options.add_argument(
|
| 27 |
+
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 28 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 29 |
+
"Chrome/114.0.0.0 Safari/537.36"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
url = "https://phosphoricons.com/"
|
| 36 |
+
driver.get(url)
|
| 37 |
+
|
| 38 |
+
wait = WebDriverWait(driver, 10)
|
| 39 |
+
name_elements = wait.until(
|
| 40 |
+
EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.name"))
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
icon_names = [el.text.strip() for el in name_elements if el.text.strip()]
|
| 44 |
+
return icon_names
|
| 45 |
+
if not icon_names:
|
| 46 |
+
return "Aucun nom d'icône trouvé."
|
| 47 |
+
|
| 48 |
+
return "\n".join(icon_names)
|
| 49 |
+
finally:
|
| 50 |
+
driver.quit()
|
myTools/__init__.py
ADDED
|
File without changes
|