Crocolil commited on
Commit
238786b
·
verified ·
1 Parent(s): b9709b4

Upload folder using huggingface_hub

Browse files
utils/__pycache__/geo.cpython-313.pyc ADDED
Binary file (762 Bytes). View file
 
utils/__pycache__/geo.cpython-314.pyc ADDED
Binary file (810 Bytes). View file
 
utils/geo.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ def city_to_coordinates(city):
3
+ url = "https://nominatim.openstreetmap.org/search"
4
+
5
+ params = {
6
+ "q": city,
7
+ "format": "json",
8
+ "limit": 1
9
+ }
10
+
11
+ headers = {
12
+ "User-Agent": "PlantWise"
13
+ }
14
+
15
+ response = requests.get(url, params=params, headers=headers)
16
+ data = response.json()
17
+
18
+ if not data:
19
+ return None
20
+
21
+ return float(data[0]["lat"]), float(data[0]["lon"])
utils/taxinomy.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import growth database
2
+ import pandas as pd
3
+ from rapidfuzz import process
4
+ import requests
5
+
6
+ growth_df = pd.read_csv("../data/growth_csv/growth.csv")
7
+
8
+
9
+ def get_common_names_gbif(scientific_name: str) -> list[str]:
10
+ """Get all common names for a scientific name via GBIF."""
11
+ # Step 1: get GBIF taxon key
12
+ r = requests.get(
13
+ "https://api.gbif.org/v1/species/match",
14
+ params={"name": scientific_name, "strict": False}
15
+ )
16
+ key = r.json().get("usageKey")
17
+ if not key:
18
+ return []
19
+
20
+ # Step 2: get vernacular names
21
+ r2 = requests.get(f"https://api.gbif.org/v1/species/{key}/vernacularNames")
22
+ names = r2.json().get("results", [])
23
+ return [n["vernacularName"].lower() for n in names if n.get("language") == "eng"]
24
+
25
+ # identify scientific name with the common name in the growth database
26
+ def find_common_name_match(scientific_name: str) -> str:
27
+ """Find the common name for a scientific name in the growth database."""
28
+ common_names = get_common_names_gbif(scientific_name)
29
+ for name in common_names:
30
+
31
+ # check if it matches even partially with the common name in the growth database using fuzzy matching
32
+ match = process.extractOne(name, growth_df["Plant Name"], score_cutoff=80)
33
+ if match:
34
+ return match[0] # return the matched common name from the growth database
35
+ else:
36
+ for word in name.split():
37
+ print(f" Checking if '{word}' is in growth database common names...")
38
+ match = process.extractOne(word, growth_df["Plant Name"], score_cutoff=80)
39
+ if match:
40
+ print(f" Found a match for '{word}': '{match[0]}' with score {match[1]}")
41
+ return match[0] # return the matched common name from the growth database
42
+
43
+ return None
44
+
45
+ print(find_common_name_match("Circium vulgare")) # should return "Spear Thistle"
46
+
47
+ def get_growth_info(scientific_name: str) -> dict:
48
+ common_name = find_common_name_match(scientific_name)
49
+ if not common_name:
50
+ return {}
51
+ return growth_df[growth_df["Plant Name"] == common_name].iloc[0].to_dict()
utils/taxinomy_t.ipynb ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "7edbd867",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "['french lavender', 'french lavender', 'french lavender', 'french lavender', 'french lavender', 'italian lavender', 'italian lavender']\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "import requests\n",
19
+ "def get_common_names_gbif(scientific_name: str) -> list[str]:\n",
20
+ " \"\"\"Get all common names for a scientific name via GBIF.\"\"\"\n",
21
+ " # Step 1: get GBIF taxon key\n",
22
+ " r = requests.get(\n",
23
+ " \"https://api.gbif.org/v1/species/match\",\n",
24
+ " params={\"name\": scientific_name, \"strict\": False}\n",
25
+ " )\n",
26
+ " key = r.json().get(\"usageKey\")\n",
27
+ " if not key:\n",
28
+ " return []\n",
29
+ "\n",
30
+ " # Step 2: get vernacular names\n",
31
+ " r2 = requests.get(f\"https://api.gbif.org/v1/species/{key}/vernacularNames\")\n",
32
+ " names = r2.json().get(\"results\", [])\n",
33
+ " return [n[\"vernacularName\"].lower() for n in names if n.get(\"language\") == \"eng\"]\n",
34
+ "\n",
35
+ "# Example:\n",
36
+ "print(get_common_names_gbif(\"Circium vulgare\")) "
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": null,
42
+ "id": "67faa7dd",
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "Lavender\n"
50
+ ]
51
+ }
52
+ ],
53
+ "source": [
54
+ "# import growth database \n",
55
+ "import pandas as pd\n",
56
+ "growth_df = pd.read_csv(\"../data/growth_csv/growth.csv\")\n",
57
+ "\n",
58
+ "from rapidfuzz import process\n",
59
+ "\n",
60
+ "# identify scientific name with the common name in the growth database\n",
61
+ "def find_common_name(scientific_name: str) -> str:\n",
62
+ " \"\"\"Find the common name for a scientific name in the growth database.\"\"\"\n",
63
+ " common_names = get_common_names_gbif(scientific_name)\n",
64
+ " for name in common_names:\n",
65
+ " \n",
66
+ " # check if it matches even partially with the common name in the growth database using fuzzy matching\n",
67
+ " match = process.extractOne(name, growth_df[\"Plant Name\"], score_cutoff=80)\n",
68
+ " if match:\n",
69
+ " return match[0] # return the matched common name from the growth database\n",
70
+ " else:\n",
71
+ " for word in name.split():\n",
72
+ " print(f\" Checking if '{word}' is in growth database common names...\")\n",
73
+ " match = process.extractOne(word, growth_df[\"Plant Name\"], score_cutoff=80)\n",
74
+ " if match:\n",
75
+ " print(f\" Found a match for '{word}': '{match[0]}' with score {match[1]}\")\n",
76
+ " return match[0] # return the matched common name from the growth database\n",
77
+ " \n",
78
+ " return None\n",
79
+ "\n",
80
+ "print(find_common_name(\"Circium vulgare\")) # should return \"Spear Thistle\"\n",
81
+ "\n",
82
+ "def get_growth_info(scientific_name: str) -> dict:\n",
83
+ " common_name = find_common_name(scientific_name)\n",
84
+ " if not common_name:\n",
85
+ " return {}\n",
86
+ " return growth_df[growth_df[\"Plant Name\"] == common_name].iloc[0].to_dict()\n",
87
+ "\n",
88
+ "# print(get_growth_info(\"Lavandula stoechas\"))"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": null,
94
+ "id": "4e41ad2a",
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "from geo import city_to_coordinates"
99
+ ]
100
+ }
101
+ ],
102
+ "metadata": {
103
+ "kernelspec": {
104
+ "display_name": "Python 3",
105
+ "language": "python",
106
+ "name": "python3"
107
+ },
108
+ "language_info": {
109
+ "codemirror_mode": {
110
+ "name": "ipython",
111
+ "version": 3
112
+ },
113
+ "file_extension": ".py",
114
+ "mimetype": "text/x-python",
115
+ "name": "python",
116
+ "nbconvert_exporter": "python",
117
+ "pygments_lexer": "ipython3",
118
+ "version": "3.14.5"
119
+ }
120
+ },
121
+ "nbformat": 4,
122
+ "nbformat_minor": 5
123
+ }