Spaces:
Runtime error
Runtime error
xxx
Browse files- my_tools.py +65 -35
my_tools.py
CHANGED
|
@@ -267,50 +267,52 @@ def read_excel_data(file_path: str, sheet_name=0) -> str:
|
|
| 267 |
|
| 268 |
def classify_botanical(items_list_str: str) -> str:
|
| 269 |
"""
|
| 270 |
-
|
|
|
|
|
|
|
| 271 |
"""
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
"
|
| 276 |
-
"
|
| 277 |
-
"
|
| 278 |
-
"spinach": "espinaca", "kale": "col rizada",
|
| 279 |
-
"sweet potatoes": "batata", "sweet potato": "batata", "potato": "patata",
|
| 280 |
-
"onion": "cebolla", "garlic": "ajo", "carrot": "zanahoria",
|
| 281 |
-
"okra": "okra", "cabbage": "col", "cauliflower": "coliflor"
|
| 282 |
-
}
|
| 283 |
-
vegetables_es = {
|
| 284 |
-
"zanahoria","patata","batata","cebolla","ajo","puerro","apio",
|
| 285 |
-
"lechuga","espinaca","brócoli","calabacín","berenjena","pepino",
|
| 286 |
-
"judía verde","maíz","okra","col rizada","col","coliflor"
|
| 287 |
}
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
"
|
|
|
|
|
|
|
| 291 |
}
|
| 292 |
|
| 293 |
-
|
| 294 |
-
for
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
|
|
|
| 306 |
|
| 307 |
return (
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
)
|
| 312 |
|
| 313 |
-
|
|
|
|
| 314 |
"""
|
| 315 |
Busca una sección en una página de Wikipedia y extrae la tabla indicada (por índice).
|
| 316 |
Devuelve el CSV.
|
|
@@ -338,6 +340,34 @@ def scrape_wikipedia_table(page_title: str, section: str, table_index: int = 0)
|
|
| 338 |
except Exception as e:
|
| 339 |
return f"Error scrape_wiki_table: {e}"
|
| 340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
# -------------------------------------------------------------------
|
| 342 |
# 4) ENVUELTORES DE HERRAMIENTAS (FunctionTool)
|
| 343 |
# -------------------------------------------------------------------
|
|
|
|
| 267 |
|
| 268 |
def classify_botanical(items_list_str: str) -> str:
|
| 269 |
"""
|
| 270 |
+
Splits an input list of foods (English names) into botanical Vegetables,
|
| 271 |
+
Fruits, and Others, and returns the three groups as comma-separated lists.
|
| 272 |
+
Nothing that is a botanical fruit appears in the Vegetables list.
|
| 273 |
"""
|
| 274 |
+
|
| 275 |
+
# --- botanical criteria -------------------------------------------------
|
| 276 |
+
botanical_fruits = {
|
| 277 |
+
"tomato", "bell pepper", "pepper", "green beans", "beans", "zucchini",
|
| 278 |
+
"cucumber", "eggplant", "corn", "peas", "pea", "pumpkin", "squash",
|
| 279 |
+
"avocado"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
}
|
| 281 |
+
|
| 282 |
+
botanical_vegetables = {
|
| 283 |
+
"broccoli", "celery", "lettuce", "kale", "spinach", "sweet potatoes",
|
| 284 |
+
"sweet potato", "potato", "onion", "garlic", "carrot", "okra",
|
| 285 |
+
"cabbage", "cauliflower", "beet", "turnip", "parsnip", "leek"
|
| 286 |
}
|
| 287 |
|
| 288 |
+
# -----------------------------------------------------------------------
|
| 289 |
+
raw_items = [token.strip().lower() for token in items_list_str.split(",")]
|
| 290 |
+
|
| 291 |
+
vegetables = []
|
| 292 |
+
fruits = []
|
| 293 |
+
others = []
|
| 294 |
|
| 295 |
+
for item in raw_items:
|
| 296 |
+
if item in botanical_vegetables and item not in botanical_fruits:
|
| 297 |
+
vegetables.append(item)
|
| 298 |
+
elif item in botanical_fruits:
|
| 299 |
+
fruits.append(item)
|
| 300 |
+
else:
|
| 301 |
+
others.append(item)
|
| 302 |
|
| 303 |
+
# De-duplicate and alphabetise
|
| 304 |
+
vegetables = sorted(set(vegetables))
|
| 305 |
+
fruits = sorted(set(fruits))
|
| 306 |
+
others = sorted(set(others))
|
| 307 |
|
| 308 |
return (
|
| 309 |
+
f"Vegetables: {', '.join(vegetables)}\n"
|
| 310 |
+
f"Fruits: {', '.join(fruits)}\n"
|
| 311 |
+
f"Others: {', '.join(others)}"
|
| 312 |
+
)
|
| 313 |
|
| 314 |
+
|
| 315 |
+
def scrape_wikipedia_table_deprecated(page_title: str, section: str, table_index: int = 0) -> str:
|
| 316 |
"""
|
| 317 |
Busca una sección en una página de Wikipedia y extrae la tabla indicada (por índice).
|
| 318 |
Devuelve el CSV.
|
|
|
|
| 340 |
except Exception as e:
|
| 341 |
return f"Error scrape_wiki_table: {e}"
|
| 342 |
|
| 343 |
+
def scrape_wikipedia_table(page_title: str,
|
| 344 |
+
section: str | None = None,
|
| 345 |
+
table_index: int = 0) -> str:
|
| 346 |
+
"""
|
| 347 |
+
Devuelve la tabla pedida en Markdown.
|
| 348 |
+
Si `section` es None ⇒ busca en toda la página.
|
| 349 |
+
"""
|
| 350 |
+
base_url = "https://en.wikipedia.org/wiki/"
|
| 351 |
+
url = base_url + page_title.replace(" ", "_")
|
| 352 |
+
|
| 353 |
+
html = requests.get(url, timeout=15).text
|
| 354 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 355 |
+
|
| 356 |
+
# encontrar tablas
|
| 357 |
+
if section:
|
| 358 |
+
header = soup.find(id=section)
|
| 359 |
+
if not header:
|
| 360 |
+
raise ValueError(f"Section '{section}' not found.")
|
| 361 |
+
tables = header.find_all_next("table", class_="wikitable")
|
| 362 |
+
else:
|
| 363 |
+
tables = soup.find_all("table", class_="wikitable")
|
| 364 |
+
|
| 365 |
+
if not tables or table_index >= len(tables):
|
| 366 |
+
raise ValueError(f"Table index {table_index} out of range (found {len(tables)})")
|
| 367 |
+
|
| 368 |
+
df = pd.read_html(str(tables[table_index]), flavor="bs4")[0]
|
| 369 |
+
return df.to_markdown(index=False)
|
| 370 |
+
|
| 371 |
# -------------------------------------------------------------------
|
| 372 |
# 4) ENVUELTORES DE HERRAMIENTAS (FunctionTool)
|
| 373 |
# -------------------------------------------------------------------
|