Nancy1906 commited on
Commit
8aab43c
·
verified ·
1 Parent(s): 0524371
Files changed (1) hide show
  1. my_tools.py +65 -35
my_tools.py CHANGED
@@ -267,50 +267,52 @@ def read_excel_data(file_path: str, sheet_name=0) -> str:
267
 
268
  def classify_botanical(items_list_str: str) -> str:
269
  """
270
- Clasifica botánicamente una lista de alimentos (en inglés o español) en Verduras, Frutas u Otros.
 
 
271
  """
272
- mapping = {
273
- "tomato": "tomate", "pepper": "pimiento", "bell pepper": "pimiento",
274
- "green beans": "judía verde", "beans": "judía verde",
275
- "zucchini": "calabacín", "eggplant": "berenjena", "cucumber": "pepino",
276
- "broccoli": "brócoli", "celery": "apio", "lettuce": "lechuga",
277
- "corn": "maíz", "peas": "guisante", "pea": "guisante",
278
- "spinach": "espinaca", "kale": "col rizada",
279
- "sweet potatoes": "batata", "sweet potato": "batata", "potato": "patata",
280
- "onion": "cebolla", "garlic": "ajo", "carrot": "zanahoria",
281
- "okra": "okra", "cabbage": "col", "cauliflower": "coliflor"
282
- }
283
- vegetables_es = {
284
- "zanahoria","patata","batata","cebolla","ajo","puerro","apio",
285
- "lechuga","espinaca","brócoli","calabacín","berenjena","pepino",
286
- "judía verde","maíz","okra","col rizada","col","coliflor"
287
  }
288
- fruits_es = {
289
- "tomate","pepino","calabacín","berenjena","pimiento","aguacate",
290
- "calabaza","guisante","judía verde","maíz"
 
 
291
  }
292
 
293
- items = []
294
- for raw in items_list_str.split(','):
295
- itm = raw.strip().lower()
296
- itm_es = mapping.get(itm, itm)
297
- items.append(itm_es)
 
298
 
299
- vegs = [i for i in items if i in vegetables_es and i not in fruits_es]
300
- fruits_found = [i for i in items if i in fruits_es]
301
- others = [i for i in items if i not in vegetables_es and i not in fruits_es]
 
 
 
 
302
 
303
- vegs_sorted = sorted(set(vegs))
304
- fruits_sorted = sorted(set(fruits_found))
305
- others_sorted = sorted(set(others))
 
306
 
307
  return (
308
- f"Vegetables: {', '.join(vegs_sorted)}\n"
309
- f"Fruits: {', '.join(fruits_sorted)}\n"
310
- f"Others: {', '.join(others_sorted)}"
311
- )
312
 
313
- def scrape_wikipedia_table(page_title: str, section: str, table_index: int = 0) -> str:
 
314
  """
315
  Busca una sección en una página de Wikipedia y extrae la tabla indicada (por índice).
316
  Devuelve el CSV.
@@ -338,6 +340,34 @@ def scrape_wikipedia_table(page_title: str, section: str, table_index: int = 0)
338
  except Exception as e:
339
  return f"Error scrape_wiki_table: {e}"
340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  # -------------------------------------------------------------------
342
  # 4) ENVUELTORES DE HERRAMIENTAS (FunctionTool)
343
  # -------------------------------------------------------------------
 
267
 
268
  def classify_botanical(items_list_str: str) -> str:
269
  """
270
+ Splits an input list of foods (English names) into botanical Vegetables,
271
+ Fruits, and Others, and returns the three groups as comma-separated lists.
272
+ Nothing that is a botanical fruit appears in the Vegetables list.
273
  """
274
+
275
+ # --- botanical criteria -------------------------------------------------
276
+ botanical_fruits = {
277
+ "tomato", "bell pepper", "pepper", "green beans", "beans", "zucchini",
278
+ "cucumber", "eggplant", "corn", "peas", "pea", "pumpkin", "squash",
279
+ "avocado"
 
 
 
 
 
 
 
 
 
280
  }
281
+
282
+ botanical_vegetables = {
283
+ "broccoli", "celery", "lettuce", "kale", "spinach", "sweet potatoes",
284
+ "sweet potato", "potato", "onion", "garlic", "carrot", "okra",
285
+ "cabbage", "cauliflower", "beet", "turnip", "parsnip", "leek"
286
  }
287
 
288
+ # -----------------------------------------------------------------------
289
+ raw_items = [token.strip().lower() for token in items_list_str.split(",")]
290
+
291
+ vegetables = []
292
+ fruits = []
293
+ others = []
294
 
295
+ for item in raw_items:
296
+ if item in botanical_vegetables and item not in botanical_fruits:
297
+ vegetables.append(item)
298
+ elif item in botanical_fruits:
299
+ fruits.append(item)
300
+ else:
301
+ others.append(item)
302
 
303
+ # De-duplicate and alphabetise
304
+ vegetables = sorted(set(vegetables))
305
+ fruits = sorted(set(fruits))
306
+ others = sorted(set(others))
307
 
308
  return (
309
+ f"Vegetables: {', '.join(vegetables)}\n"
310
+ f"Fruits: {', '.join(fruits)}\n"
311
+ f"Others: {', '.join(others)}"
312
+ )
313
 
314
+
315
+ def scrape_wikipedia_table_deprecated(page_title: str, section: str, table_index: int = 0) -> str:
316
  """
317
  Busca una sección en una página de Wikipedia y extrae la tabla indicada (por índice).
318
  Devuelve el CSV.
 
340
  except Exception as e:
341
  return f"Error scrape_wiki_table: {e}"
342
 
343
+ def scrape_wikipedia_table(page_title: str,
344
+ section: str | None = None,
345
+ table_index: int = 0) -> str:
346
+ """
347
+ Devuelve la tabla pedida en Markdown.
348
+ Si `section` es None ⇒ busca en toda la página.
349
+ """
350
+ base_url = "https://en.wikipedia.org/wiki/"
351
+ url = base_url + page_title.replace(" ", "_")
352
+
353
+ html = requests.get(url, timeout=15).text
354
+ soup = BeautifulSoup(html, "html.parser")
355
+
356
+ # encontrar tablas
357
+ if section:
358
+ header = soup.find(id=section)
359
+ if not header:
360
+ raise ValueError(f"Section '{section}' not found.")
361
+ tables = header.find_all_next("table", class_="wikitable")
362
+ else:
363
+ tables = soup.find_all("table", class_="wikitable")
364
+
365
+ if not tables or table_index >= len(tables):
366
+ raise ValueError(f"Table index {table_index} out of range (found {len(tables)})")
367
+
368
+ df = pd.read_html(str(tables[table_index]), flavor="bs4")[0]
369
+ return df.to_markdown(index=False)
370
+
371
  # -------------------------------------------------------------------
372
  # 4) ENVUELTORES DE HERRAMIENTAS (FunctionTool)
373
  # -------------------------------------------------------------------