Spaces:
Sleeping
Sleeping
| from smolagents import Tool | |
| import requests | |
| import re | |
| from markdownify import markdownify as md | |
| class ExtractWikipediaSection(Tool): | |
| name = "extract_wikipedia_section" | |
| description = "Extracts a specific section from a Wikipedia page in Markdown format." | |
| inputs = { | |
| "url": { | |
| "type": "string", | |
| "description": "URL of the Wikipedia page" | |
| }, | |
| "section": { | |
| "type": "string", | |
| "description": "Title of the section to extract" | |
| }, | |
| } | |
| output_type = "string" | |
| def forward(self, url: str, section: str) -> str: | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (compatible; WebScraper/1.0; +https://example.com/bot)" | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to fetch page: {e}") | |
| markdown = md(response.text, heading_style="ATX") | |
| # RegEx pour détecter la section markdown | |
| pattern = rf"^##+\s*{re.escape(section)}\s*$(.*?)^##+" | |
| match = re.search(pattern, markdown, re.DOTALL | re.MULTILINE) | |
| if match: | |
| return match.group(1).strip() | |
| else: | |
| return f"❌ Section '{section}' not found on page." | |