Guarden

Running

App Files Files Community

Crocolil commited on 17 days ago

Commit

238786b

verified ·

1 Parent(s): b9709b4

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

utils/__pycache__/geo.cpython-313.pyc +0 -0
utils/__pycache__/geo.cpython-314.pyc +0 -0
utils/geo.py +21 -0
utils/taxinomy.py +51 -0
utils/taxinomy_t.ipynb +123 -0

utils/__pycache__/geo.cpython-313.pyc ADDED Viewed

Binary file (762 Bytes). View file

utils/__pycache__/geo.cpython-314.pyc ADDED Viewed

Binary file (810 Bytes). View file

utils/geo.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import requests
+def city_to_coordinates(city):
+    url = "https://nominatim.openstreetmap.org/search"
+    params = {
+        "q": city,
+        "format": "json",
+        "limit": 1
+    }
+    headers = {
+        "User-Agent": "PlantWise"
+    }
+    response = requests.get(url, params=params, headers=headers)
+    data = response.json()
+    if not data:
+        return None
+    return float(data[0]["lat"]), float(data[0]["lon"])

utils/taxinomy.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# import growth database
+import pandas as pd
+from rapidfuzz import process
+import requests
+growth_df = pd.read_csv("../data/growth_csv/growth.csv")
+def get_common_names_gbif(scientific_name: str) -> list[str]:
+    """Get all common names for a scientific name via GBIF."""
+    # Step 1: get GBIF taxon key
+    r = requests.get(
+        "https://api.gbif.org/v1/species/match",
+        params={"name": scientific_name, "strict": False}
+    )
+    key = r.json().get("usageKey")
+    if not key:
+        return []
+    # Step 2: get vernacular names
+    r2 = requests.get(f"https://api.gbif.org/v1/species/{key}/vernacularNames")
+    names = r2.json().get("results", [])
+    return [n["vernacularName"].lower() for n in names if n.get("language") == "eng"]
+# identify scientific name with the common name in the growth database
+def find_common_name_match(scientific_name: str) -> str:
+    """Find the common name for a scientific name in the growth database."""
+    common_names = get_common_names_gbif(scientific_name)
+    for name in common_names:
+        # check if it matches even partially with the common name in the growth database using fuzzy matching
+        match = process.extractOne(name, growth_df["Plant Name"], score_cutoff=80)
+        if match:
+            return match[0]  # return the matched common name from the growth database
+        else:
+            for word in name.split():
+                print(f"  Checking if '{word}' is in growth database common names...")
+                match = process.extractOne(word, growth_df["Plant Name"], score_cutoff=80)
+                if match:
+                    print(f"    Found a match for '{word}': '{match[0]}' with score {match[1]}")
+                    return match[0]  # return the matched common name from the growth database
+    return None
+print(find_common_name_match("Circium vulgare"))  # should return "Spear Thistle"
+def get_growth_info(scientific_name: str) -> dict:
+    common_name = find_common_name_match(scientific_name)
+    if not common_name:
+        return {}
+    return growth_df[growth_df["Plant Name"] == common_name].iloc[0].to_dict()

utils/taxinomy_t.ipynb ADDED Viewed

	@@ -0,0 +1,123 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7edbd867",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['french lavender', 'french lavender', 'french lavender', 'french lavender', 'french lavender', 'italian lavender', 'italian lavender']\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "def get_common_names_gbif(scientific_name: str) -> list[str]:\n",
+    "    \"\"\"Get all common names for a scientific name via GBIF.\"\"\"\n",
+    "    # Step 1: get GBIF taxon key\n",
+    "    r = requests.get(\n",
+    "        \"https://api.gbif.org/v1/species/match\",\n",
+    "        params={\"name\": scientific_name, \"strict\": False}\n",
+    "    )\n",
+    "    key = r.json().get(\"usageKey\")\n",
+    "    if not key:\n",
+    "        return []\n",
+    "\n",
+    "    # Step 2: get vernacular names\n",
+    "    r2 = requests.get(f\"https://api.gbif.org/v1/species/{key}/vernacularNames\")\n",
+    "    names = r2.json().get(\"results\", [])\n",
+    "    return [n[\"vernacularName\"].lower() for n in names if n.get(\"language\") == \"eng\"]\n",
+    "\n",
+    "# Example:\n",
+    "print(get_common_names_gbif(\"Circium vulgare\")) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67faa7dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Lavender\n"
+     ]
+    }
+   ],
+   "source": [
+    "# import growth database \n",
+    "import pandas as pd\n",
+    "growth_df = pd.read_csv(\"../data/growth_csv/growth.csv\")\n",
+    "\n",
+    "from rapidfuzz import process\n",
+    "\n",
+    "# identify scientific name with the common name in the growth database\n",
+    "def find_common_name(scientific_name: str) -> str:\n",
+    "    \"\"\"Find the common name for a scientific name in the growth database.\"\"\"\n",
+    "    common_names = get_common_names_gbif(scientific_name)\n",
+    "    for name in common_names:\n",
+    "        \n",
+    "        # check if it matches even partially with the common name in the growth database using fuzzy matching\n",
+    "        match = process.extractOne(name, growth_df[\"Plant Name\"], score_cutoff=80)\n",
+    "        if match:\n",
+    "            return match[0]  # return the matched common name from the growth database\n",
+    "        else:\n",
+    "            for word in name.split():\n",
+    "                print(f\"  Checking if '{word}' is in growth database common names...\")\n",
+    "                match = process.extractOne(word, growth_df[\"Plant Name\"], score_cutoff=80)\n",
+    "                if match:\n",
+    "                    print(f\"    Found a match for '{word}': '{match[0]}' with score {match[1]}\")\n",
+    "                    return match[0]  # return the matched common name from the growth database\n",
+    "        \n",
+    "    return None\n",
+    "\n",
+    "print(find_common_name(\"Circium vulgare\"))  # should return \"Spear Thistle\"\n",
+    "\n",
+    "def get_growth_info(scientific_name: str) -> dict:\n",
+    "    common_name = find_common_name(scientific_name)\n",
+    "    if not common_name:\n",
+    "        return {}\n",
+    "    return growth_df[growth_df[\"Plant Name\"] == common_name].iloc[0].to_dict()\n",
+    "\n",
+    "# print(get_growth_info(\"Lavandula stoechas\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e41ad2a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from geo import city_to_coordinates"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}