Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,40 @@ from bs4 import BeautifulSoup
|
|
| 9 |
from Gradio_UI import GradioUI
|
| 10 |
|
| 11 |
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@tool
|
| 13 |
def extract_metadata_from_url(url: str) -> dict:
|
| 14 |
"""Extrae todos los metadatos de una página web.
|
|
@@ -78,7 +112,7 @@ with open("prompts.yaml", 'r') as stream:
|
|
| 78 |
|
| 79 |
agent = CodeAgent(
|
| 80 |
model=model,
|
| 81 |
-
tools=[final_answer, extract_metadata_from_url], ## add your tools here (don't remove final answer)
|
| 82 |
max_steps=6,
|
| 83 |
verbosity_level=1,
|
| 84 |
grammar=None,
|
|
|
|
| 9 |
from Gradio_UI import GradioUI
|
| 10 |
|
| 11 |
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
| 12 |
+
@tool
|
| 13 |
+
def scrape_webpage(url: str, tag: str = "p", class_name: str = None) -> dict:
|
| 14 |
+
"""Extrae contenido de una página web según una etiqueta HTML y clase opcional.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
url: URL de la página a scrapear.
|
| 18 |
+
tag: Etiqueta HTML a extraer (por defecto <p>).
|
| 19 |
+
class_name: Clase CSS opcional para filtrar resultados.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
Un diccionario con el contenido extraído.
|
| 23 |
+
"""
|
| 24 |
+
try:
|
| 25 |
+
headers = {'User-Agent': 'Mozilla/5.0'}
|
| 26 |
+
response = requests.get(url, headers=headers)
|
| 27 |
+
response.raise_for_status()
|
| 28 |
+
|
| 29 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 30 |
+
|
| 31 |
+
if class_name:
|
| 32 |
+
elements = soup.find_all(tag, class_=class_name)
|
| 33 |
+
else:
|
| 34 |
+
elements = soup.find_all(tag)
|
| 35 |
+
|
| 36 |
+
extracted_data = [element.get_text(strip=True) for element in elements]
|
| 37 |
+
|
| 38 |
+
return {"url": url, "scraped_data": extracted_data[:20]} # Limita a 10 resultados
|
| 39 |
+
|
| 40 |
+
except requests.exceptions.RequestException as e:
|
| 41 |
+
return {"error": f"Error al acceder a la URL: {str(e)}"}
|
| 42 |
+
except Exception as e:
|
| 43 |
+
return {"error": f"Error inesperado: {str(e)}"}
|
| 44 |
+
|
| 45 |
+
|
| 46 |
@tool
|
| 47 |
def extract_metadata_from_url(url: str) -> dict:
|
| 48 |
"""Extrae todos los metadatos de una página web.
|
|
|
|
| 112 |
|
| 113 |
agent = CodeAgent(
|
| 114 |
model=model,
|
| 115 |
+
tools=[final_answer, extract_metadata_from_url, scrape_webpage], ## add your tools here (don't remove final answer)
|
| 116 |
max_steps=6,
|
| 117 |
verbosity_level=1,
|
| 118 |
grammar=None,
|