outshine84 commited on
Commit
4f12e6d
·
1 Parent(s): e89d6e2
.gitattributes CHANGED
@@ -36,3 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  *.sqlite3 filter=lfs diff=lfs merge=lfs -text
37
  *.faiss filter=lfs diff=lfs merge=lfs -text
38
  *.db filter=lfs diff=lfs merge=lfs -text
 
 
 
36
  *.sqlite3 filter=lfs diff=lfs merge=lfs -text
37
  *.faiss filter=lfs diff=lfs merge=lfs -text
38
  *.db filter=lfs diff=lfs merge=lfs -text
39
+ data/leafsnap_*.faiss filter=lfs diff=lfs merge=lfs -text
40
+ data/leafsnap_*.pt filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -7,4 +7,5 @@ client_secret*
7
  pwa-app/.env
8
  missing_species_alias.csv
9
  unique_species_labels.csv
10
- data/images/
 
 
7
  pwa-app/.env
8
  missing_species_alias.csv
9
  unique_species_labels.csv
10
+ data/images/
11
+ missing_species.csv
api.py CHANGED
@@ -28,6 +28,8 @@ load_dotenv()
28
  INDEX_PATH = os.getenv("PLANCLEF_INDEX_PATH", "data/planclef.faiss")
29
  CACHE_PATH = os.getenv("PLANCLEF_CACHE_PATH", "data/planclef_cache.pt")
30
  MODEL_NAME = os.getenv("PLANCLEF_MODEL_NAME", "ViT-B-32")
 
 
31
  RAG_DB_PATH = os.getenv("RAG_DB_PATH", "data/plant_rag")
32
  WIKI_USER_AGENT = os.getenv(
33
  "WIKI_USER_AGENT",
@@ -170,6 +172,20 @@ def _species_to_folder_name(species_name: str) -> str:
170
 
171
 
172
  def _get_species_preview_image_url(species_name: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  try:
174
  collection = get_rag_collection()
175
  res = collection.get(
@@ -265,9 +281,37 @@ def get_plants_db_connection() -> sqlite3.Connection:
265
 
266
  conn = sqlite3.connect(db_path)
267
  conn.row_factory = sqlite3.Row
 
 
 
 
 
268
  return conn
269
 
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  def _sqlite_table_exists(conn: sqlite3.Connection, table_name: str) -> bool:
272
  row = conn.execute(
273
  "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ? LIMIT 1",
@@ -790,10 +834,23 @@ def get_index():
790
  try:
791
  from plentclef import PlentClefIndex
792
 
 
 
 
 
 
 
 
 
 
 
793
  index = PlentClefIndex(
794
  model_name=MODEL_NAME,
795
  index_path=INDEX_PATH,
796
  index_cache=CACHE_PATH,
 
 
 
797
  )
798
  except Exception as e:
799
  cause = f"{type(e).__name__}: {e}"
@@ -1179,12 +1236,14 @@ def plant_info(
1179
 
1180
  title = first_meta.get("species_name", name)
1181
  common_name = first_meta.get("common_name", "")
1182
- image_paths_json = first_meta.get("image_paths", "[]")
1183
-
1184
- try:
1185
- image_paths = json.loads(image_paths_json)
1186
- except (json.JSONDecodeError, TypeError):
1187
- image_paths = []
 
 
1188
 
1189
  # Combine chunks for OpenAI context (up to 6000 chars)
1190
  documents = results.get("documents", [])
 
28
  INDEX_PATH = os.getenv("PLANCLEF_INDEX_PATH", "data/planclef.faiss")
29
  CACHE_PATH = os.getenv("PLANCLEF_CACHE_PATH", "data/planclef_cache.pt")
30
  MODEL_NAME = os.getenv("PLANCLEF_MODEL_NAME", "ViT-B-32")
31
+ LEAFSNAP_INDEX_PATH = os.getenv("LEAFSNAP_INDEX_PATH", "data/leafsnap_field.faiss")
32
+ LEAFSNAP_CACHE_PATH = os.getenv("LEAFSNAP_CACHE_PATH", "data/leafsnap_cache.pt")
33
  RAG_DB_PATH = os.getenv("RAG_DB_PATH", "data/plant_rag")
34
  WIKI_USER_AGENT = os.getenv(
35
  "WIKI_USER_AGENT",
 
172
 
173
 
174
  def _get_species_preview_image_url(species_name: str) -> str:
175
+ image_paths = _get_species_images_from_db(species_name)
176
+ for raw_path in image_paths:
177
+ if isinstance(raw_path, str) and raw_path.startswith(("http://", "https://")):
178
+ return raw_path
179
+
180
+ normalized_path = _normalize_image_path(str(raw_path or ""))
181
+ if not normalized_path:
182
+ continue
183
+
184
+ local_path = Path("data") / "images" / normalized_path
185
+ if local_path.exists():
186
+ return f"/images/{normalized_path}"
187
+
188
+ # Backward compatibility: read from legacy RAG metadata if DB is empty.
189
  try:
190
  collection = get_rag_collection()
191
  res = collection.get(
 
281
 
282
  conn = sqlite3.connect(db_path)
283
  conn.row_factory = sqlite3.Row
284
+ try:
285
+ conn.execute("ALTER TABLE plants ADD COLUMN image_paths TEXT")
286
+ conn.commit()
287
+ except Exception:
288
+ pass
289
  return conn
290
 
291
 
292
+ def _get_species_images_from_db(species_name: str) -> list[str]:
293
+ query = "SELECT image_paths FROM plants WHERE lower(species_name) = lower(?) LIMIT 1"
294
+ with get_plants_db_connection() as conn:
295
+ row = conn.execute(query, (species_name.strip(),)).fetchone()
296
+
297
+ if row is None:
298
+ return []
299
+
300
+ raw = row["image_paths"] if "image_paths" in row.keys() else None
301
+ if not raw:
302
+ return []
303
+
304
+ try:
305
+ parsed = json.loads(raw)
306
+ except (json.JSONDecodeError, TypeError):
307
+ return []
308
+
309
+ if not isinstance(parsed, list):
310
+ return []
311
+
312
+ return [str(v).strip() for v in parsed if str(v).strip()]
313
+
314
+
315
  def _sqlite_table_exists(conn: sqlite3.Connection, table_name: str) -> bool:
316
  row = conn.execute(
317
  "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ? LIMIT 1",
 
834
  try:
835
  from plentclef import PlentClefIndex
836
 
837
+ leafsnap_aliases: dict[str, str] = {}
838
+ try:
839
+ with sqlite3.connect(PLANTS_SQLITE_PATH) as _conn:
840
+ rows = _conn.execute(
841
+ "SELECT leafsnap_label, db_species_name FROM leafsnap_aliases"
842
+ ).fetchall()
843
+ leafsnap_aliases = {r[0]: r[1] for r in rows}
844
+ except Exception:
845
+ pass # table may not exist yet; aliases simply won't be applied
846
+
847
  index = PlentClefIndex(
848
  model_name=MODEL_NAME,
849
  index_path=INDEX_PATH,
850
  index_cache=CACHE_PATH,
851
+ leafsnap_index_path=LEAFSNAP_INDEX_PATH,
852
+ leafsnap_cache_path=LEAFSNAP_CACHE_PATH,
853
+ leafsnap_aliases=leafsnap_aliases,
854
  )
855
  except Exception as e:
856
  cause = f"{type(e).__name__}: {e}"
 
1236
 
1237
  title = first_meta.get("species_name", name)
1238
  common_name = first_meta.get("common_name", "")
1239
+ image_paths = _get_species_images_from_db(name)
1240
+ if not image_paths:
1241
+ # Backward compatibility with old RAG metadata layout.
1242
+ image_paths_json = first_meta.get("image_paths", "[]")
1243
+ try:
1244
+ image_paths = json.loads(image_paths_json)
1245
+ except (json.JSONDecodeError, TypeError):
1246
+ image_paths = []
1247
 
1248
  # Combine chunks for OpenAI context (up to 6000 chars)
1249
  documents = results.get("documents", [])
build_plant_rag.py CHANGED
@@ -13,7 +13,8 @@ Notes
13
  - Tries Italian Wikipedia first, falls back to English.
14
  - Skips sections: Note, Bibliografia, Voci correlate, Altri progetti,
15
  Collegamenti esterni (and their English equivalents).
16
- - Metadata per chunk: species_name, common_name, image_paths (JSON list of URLs), lang.
 
17
  """
18
 
19
  import csv
@@ -25,6 +26,7 @@ import sqlite3
25
  import sys
26
  import time
27
  import urllib.parse
 
28
  from pathlib import Path
29
  from typing import Optional
30
 
@@ -407,6 +409,46 @@ def collect_images(species_name: str, lang: str) -> list[str]:
407
  return fetch_wiki_image_urls(species_name, lang)[:MAX_IMAGES]
408
 
409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  # ---------------------------------------------------------------------------
411
  # Core processing
412
  # ---------------------------------------------------------------------------
@@ -419,6 +461,7 @@ def process_species(
419
  translator_client: OpenAI | None,
420
  translation_model: str,
421
  translate_non_italian: bool,
 
422
  ) -> dict:
423
  slug = slugify(species_name)
424
 
@@ -481,6 +524,7 @@ def process_species(
481
 
482
  # --- Images ---
483
  image_paths = collect_images(resolved_title, lang)
 
484
 
485
  # --- Chunk & upsert into ChromaDB ---
486
  chunks = chunk_by_words(full_text)
@@ -492,7 +536,6 @@ def process_species(
492
  {
493
  "species_name": species_name,
494
  "common_name": common_name,
495
- "image_paths": json.dumps(image_paths),
496
  "chunk_index": i,
497
  "lang": lang,
498
  "source_lang": lang,
@@ -643,6 +686,7 @@ def main() -> None:
643
  translator_client=translator_client,
644
  translation_model=args.translation_model,
645
  translate_non_italian=args.translate_non_italian,
 
646
  )
647
  progress[species] = result
648
  except Exception as exc:
@@ -669,7 +713,7 @@ def main() -> None:
669
  print(f" Errors : {errors}")
670
  print(f" Total docs : {collection.count()}")
671
  print(f" ChromaDB : {RAG_DIR}")
672
- print(" Images : stored as remote URLs in Chroma metadata")
673
 
674
 
675
  if __name__ == "__main__":
 
13
  - Tries Italian Wikipedia first, falls back to English.
14
  - Skips sections: Note, Bibliografia, Voci correlate, Altri progetti,
15
  Collegamenti esterni (and their English equivalents).
16
+ - Metadata per chunk: species_name, common_name, lang.
17
+ - Image paths per species are stored in plants.db (plants.image_paths JSON).
18
  """
19
 
20
  import csv
 
26
  import sys
27
  import time
28
  import urllib.parse
29
+ from datetime import datetime, timezone
30
  from pathlib import Path
31
  from typing import Optional
32
 
 
409
  return fetch_wiki_image_urls(species_name, lang)[:MAX_IMAGES]
410
 
411
 
412
+ def save_species_images_to_sqlite(sqlite_path: Path, species_name: str, image_paths: list[str]) -> None:
413
+ """Store per-species image paths in plants.db for API/UI consumption.
414
+
415
+ Uses JSON string in plants.image_paths and preserves existing profile fields.
416
+ """
417
+ sqlite_path.parent.mkdir(parents=True, exist_ok=True)
418
+ now_iso = datetime.now(timezone.utc).isoformat()
419
+ payload = json.dumps(image_paths, ensure_ascii=False)
420
+
421
+ if not sqlite_path.exists():
422
+ return
423
+
424
+ conn = sqlite3.connect(sqlite_path)
425
+ try:
426
+ row = conn.execute(
427
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name='plants' LIMIT 1"
428
+ ).fetchone()
429
+ if row is None:
430
+ return
431
+
432
+ try:
433
+ conn.execute("ALTER TABLE plants ADD COLUMN image_paths TEXT")
434
+ except Exception:
435
+ pass
436
+
437
+ conn.execute(
438
+ """
439
+ INSERT INTO plants (species_name, image_paths, updated_at)
440
+ VALUES (?, ?, ?)
441
+ ON CONFLICT(species_name) DO UPDATE SET
442
+ image_paths=excluded.image_paths,
443
+ updated_at=excluded.updated_at
444
+ """,
445
+ (species_name, payload, now_iso),
446
+ )
447
+ conn.commit()
448
+ finally:
449
+ conn.close()
450
+
451
+
452
  # ---------------------------------------------------------------------------
453
  # Core processing
454
  # ---------------------------------------------------------------------------
 
461
  translator_client: OpenAI | None,
462
  translation_model: str,
463
  translate_non_italian: bool,
464
+ sqlite_path: Path,
465
  ) -> dict:
466
  slug = slugify(species_name)
467
 
 
524
 
525
  # --- Images ---
526
  image_paths = collect_images(resolved_title, lang)
527
+ save_species_images_to_sqlite(sqlite_path, species_name, image_paths)
528
 
529
  # --- Chunk & upsert into ChromaDB ---
530
  chunks = chunk_by_words(full_text)
 
536
  {
537
  "species_name": species_name,
538
  "common_name": common_name,
 
539
  "chunk_index": i,
540
  "lang": lang,
541
  "source_lang": lang,
 
686
  translator_client=translator_client,
687
  translation_model=args.translation_model,
688
  translate_non_italian=args.translate_non_italian,
689
+ sqlite_path=Path(args.sqlite_path),
690
  )
691
  progress[species] = result
692
  except Exception as exc:
 
713
  print(f" Errors : {errors}")
714
  print(f" Total docs : {collection.count()}")
715
  print(f" ChromaDB : {RAG_DIR}")
716
+ print(" Images : stored in plants.db (plants.image_paths JSON)")
717
 
718
 
719
  if __name__ == "__main__":
build_plants_sqlite.py CHANGED
@@ -58,6 +58,7 @@ def init_db(conn: sqlite3.Connection) -> None:
58
  id INTEGER PRIMARY KEY AUTOINCREMENT,
59
  species_name TEXT NOT NULL UNIQUE,
60
  indexed INTEGER NOT NULL DEFAULT 0,
 
61
  annaffiatura_gg INTEGER,
62
  annaffiatura_time TEXT,
63
  luce TEXT,
@@ -72,6 +73,20 @@ def init_db(conn: sqlite3.Connection) -> None:
72
  )
73
  """
74
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  conn.commit()
76
 
77
 
@@ -324,6 +339,7 @@ def upsert_plant(
324
  species_name: str,
325
  indexed: bool,
326
  profile: dict | None,
 
327
  ) -> None:
328
  now_iso = datetime.now(timezone.utc).isoformat()
329
  profile = profile or {}
@@ -333,6 +349,7 @@ def upsert_plant(
333
  INSERT INTO plants (
334
  species_name,
335
  indexed,
 
336
  annaffiatura_gg,
337
  annaffiatura_time,
338
  luce,
@@ -345,9 +362,10 @@ def upsert_plant(
345
  prevenzione,
346
  updated_at
347
  )
348
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
349
  ON CONFLICT(species_name) DO UPDATE SET
350
  indexed=excluded.indexed,
 
351
  annaffiatura_gg=excluded.annaffiatura_gg,
352
  annaffiatura_time=excluded.annaffiatura_time,
353
  luce=excluded.luce,
@@ -363,6 +381,7 @@ def upsert_plant(
363
  (
364
  species_name,
365
  1 if indexed else 0,
 
366
  profile.get("annaffiatura_gg"),
367
  profile.get("annaffiatura_time"),
368
  profile.get("luce"),
 
58
  id INTEGER PRIMARY KEY AUTOINCREMENT,
59
  species_name TEXT NOT NULL UNIQUE,
60
  indexed INTEGER NOT NULL DEFAULT 0,
61
+ image_paths TEXT,
62
  annaffiatura_gg INTEGER,
63
  annaffiatura_time TEXT,
64
  luce TEXT,
 
73
  )
74
  """
75
  )
76
+ # Migration for existing DBs created before image_paths support.
77
+ try:
78
+ conn.execute("ALTER TABLE plants ADD COLUMN image_paths TEXT")
79
+ conn.commit()
80
+ except Exception:
81
+ pass
82
+ conn.execute(
83
+ """
84
+ CREATE TABLE IF NOT EXISTS leafsnap_aliases (
85
+ leafsnap_label TEXT PRIMARY KEY,
86
+ db_species_name TEXT NOT NULL
87
+ )
88
+ """
89
+ )
90
  conn.commit()
91
 
92
 
 
339
  species_name: str,
340
  indexed: bool,
341
  profile: dict | None,
342
+ image_paths: str | None = None,
343
  ) -> None:
344
  now_iso = datetime.now(timezone.utc).isoformat()
345
  profile = profile or {}
 
349
  INSERT INTO plants (
350
  species_name,
351
  indexed,
352
+ image_paths,
353
  annaffiatura_gg,
354
  annaffiatura_time,
355
  luce,
 
362
  prevenzione,
363
  updated_at
364
  )
365
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
366
  ON CONFLICT(species_name) DO UPDATE SET
367
  indexed=excluded.indexed,
368
+ image_paths=COALESCE(excluded.image_paths, plants.image_paths),
369
  annaffiatura_gg=excluded.annaffiatura_gg,
370
  annaffiatura_time=excluded.annaffiatura_time,
371
  luce=excluded.luce,
 
381
  (
382
  species_name,
383
  1 if indexed else 0,
384
+ image_paths,
385
  profile.get("annaffiatura_gg"),
386
  profile.get("annaffiatura_time"),
387
  profile.get("luce"),
data/plants.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b8e1d245671aa0b28d4f00e9b8ff41dd709e81de9110935124a6526ff24a0be
3
- size 294912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be66035a5e96a147ec7c9859bdf91e40c8fe9e13b383d76b858b2c2a39b0f34e
3
+ size 2465792
plentclef.py CHANGED
@@ -27,8 +27,28 @@ def _load_plantclef_cache(cache_path):
27
  with open(cache_path, "rb") as f:
28
  return pickle.load(f)
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  class PlentClefIndex():
31
- def __init__(self, model_name, index_path,index_cache):
 
 
32
  self.model, self.preprocess, self.tokenizer = open_clip.create_model_and_transforms(
33
  model_name=model_name,
34
  pretrained="laion2b_s34b_b79k"
@@ -43,29 +63,48 @@ class PlentClefIndex():
43
  raise KeyError("Missing 'labels' in PlantCLEF cache")
44
  self.plantclef_labels = data["labels"]
45
 
46
- def embed_image(self,path):
47
- img = self.preprocess(Image.open(path).convert("RGB")).unsqueeze(0)# Move image to the same device as the model
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  with torch.no_grad():
49
  e = self.model.encode_image(img)
50
  e = e / e.norm(dim=-1, keepdim=True)
51
  return e.cpu().numpy().astype("float32")
52
 
53
- def search(self,path, labels, k=5):
54
- q = self.embed_image(path)
55
- sims, idxs = self.index.search(q, k) # [1, k]
56
-
57
- aggregated_results = defaultdict(lambda: {'score_sum': 0.0, 'image_paths': []})
58
-
59
  for score, idx in zip(sims[0], idxs[0]):
60
  species_label = labels[idx]
61
- aggregated_results[species_label]['score_sum'] += score # Append the image path
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Convert aggregated results to a list of (category, total_score, image_paths_list) tuples
64
- final_results = []
65
- for category, data in aggregated_results.items():
66
- final_results.append((category, data['score_sum'], data['image_paths']))
67
 
68
- # Sort by total score in descending order
69
- final_results.sort(key=lambda x: x[1], reverse=True)
 
70
 
71
- return final_results
 
27
  with open(cache_path, "rb") as f:
28
  return pickle.load(f)
29
 
30
+ def _rrf_merge(results_list: list[list], k: int = 60) -> list[tuple]:
31
+ """Reciprocal Rank Fusion across multiple ranked result lists.
32
+
33
+ Each element of *results_list* is a list of (species, score, image_paths)
34
+ tuples already sorted by descending score. Returns a merged list sorted
35
+ by descending RRF score, same tuple format (image_paths will be empty).
36
+ """
37
+ combined: dict[str, float] = defaultdict(float)
38
+ for ranked in results_list:
39
+ for rank, (species, _score, _paths) in enumerate(ranked):
40
+ combined[species] += 1.0 / (k + rank + 1)
41
+ return sorted(
42
+ [(species, rrf_score, []) for species, rrf_score in combined.items()],
43
+ key=lambda x: x[1],
44
+ reverse=True,
45
+ )
46
+
47
+
48
  class PlentClefIndex():
49
+ def __init__(self, model_name, index_path, index_cache,
50
+ leafsnap_index_path=None, leafsnap_cache_path=None,
51
+ leafsnap_aliases: dict | None = None):
52
  self.model, self.preprocess, self.tokenizer = open_clip.create_model_and_transforms(
53
  model_name=model_name,
54
  pretrained="laion2b_s34b_b79k"
 
63
  raise KeyError("Missing 'labels' in PlantCLEF cache")
64
  self.plantclef_labels = data["labels"]
65
 
66
+ # Optional LeafSnap index (leaf-only images, same embedding space)
67
+ self.leafsnap_index = None
68
+ self.leafsnap_labels: list = []
69
+ if leafsnap_index_path and os.path.exists(leafsnap_index_path):
70
+ self.leafsnap_index = faiss.read_index(leafsnap_index_path)
71
+ if leafsnap_cache_path and os.path.exists(leafsnap_cache_path):
72
+ ls_data = _load_plantclef_cache(leafsnap_cache_path)
73
+ if not isinstance(ls_data, dict) or "labels" not in ls_data:
74
+ raise KeyError("Missing 'labels' in LeafSnap cache")
75
+ self.leafsnap_labels = ls_data["labels"]
76
+ # Dict mapping LeafSnap label -> canonical DB species name
77
+ self.leafsnap_aliases: dict[str, str] = leafsnap_aliases or {}
78
+
79
+ def embed_image(self, path):
80
+ img = self.preprocess(Image.open(path).convert("RGB")).unsqueeze(0)
81
  with torch.no_grad():
82
  e = self.model.encode_image(img)
83
  e = e / e.norm(dim=-1, keepdim=True)
84
  return e.cpu().numpy().astype("float32")
85
 
86
+ def _search_index(self, q, index, labels, k):
87
+ """Search a single FAISS index and aggregate scores by species."""
88
+ sims, idxs = index.search(q, k)
89
+ aggregated: dict = defaultdict(lambda: {'score_sum': 0.0, 'image_paths': []})
 
 
90
  for score, idx in zip(sims[0], idxs[0]):
91
  species_label = labels[idx]
92
+ species_label = self.leafsnap_aliases.get(species_label, species_label)
93
+ aggregated[species_label]['score_sum'] += score
94
+ results = [
95
+ (cat, d['score_sum'], d['image_paths'])
96
+ for cat, d in aggregated.items()
97
+ ]
98
+ results.sort(key=lambda x: x[1], reverse=True)
99
+ return results
100
+
101
+ def search(self, path, labels, k=5):
102
+ q = self.embed_image(path)
103
 
104
+ planclef_results = self._search_index(q, self.index, labels, k)
 
 
 
105
 
106
+ if self.leafsnap_index is not None and self.leafsnap_labels:
107
+ leafsnap_results = self._search_index(q, self.leafsnap_index, self.leafsnap_labels, k)
108
+ return _rrf_merge([planclef_results, leafsnap_results])[:k]
109
 
110
+ return planclef_results[:k]
pwa-app/src/App.jsx CHANGED
@@ -16,6 +16,7 @@ import {
16
  updateMyPlantFirstWaterDate,
17
  uploadMyPlantPhoto,
18
  verifyGoogleToken,
 
19
  toAbsoluteImage,
20
  toOptimizedImage
21
  } from "./api";
@@ -111,6 +112,21 @@ function formatISOToInputDate(value) {
111
  return `${year}-${month}-${day}`;
112
  }
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  export default function App({ googleClientIdConfigured = false }) {
115
  const [auth, setAuth] = useState(null);
116
  const [authBusy, setAuthBusy] = useState(false);
@@ -149,6 +165,7 @@ export default function App({ googleClientIdConfigured = false }) {
149
  const [uploadingPhotoId, setUploadingPhotoId] = useState(null);
150
  const plantPhotoInputRef = useRef(null);
151
  const plantPhotoTargetIdRef = useRef(null);
 
152
 
153
  const [busy, setBusy] = useState({
154
  search: false,
@@ -260,6 +277,83 @@ export default function App({ googleClientIdConfigured = false }) {
260
  .filter((entry) => entry.value !== null && entry.value !== "");
261
  }, [myPlantProfile]);
262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  useEffect(() => {
264
  const raw = window.localStorage.getItem(AUTH_STORAGE_KEY);
265
  if (!raw) {
@@ -281,6 +375,37 @@ export default function App({ googleClientIdConfigured = false }) {
281
  }
282
  }, []);
283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  useEffect(() => {
285
  setExpandedProfileKey("");
286
  }, [plantProfile]);
@@ -550,7 +675,25 @@ export default function App({ googleClientIdConfigured = false }) {
550
  try {
551
  const data = await saveMyPlant(selectedSpecies, trimmed);
552
  const saved = data.saved || null;
553
- setSaveStatus(saved ? `Salvata: ${saved.user_given_name}` : "Pianta salvata.");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  setUserPlantName("");
555
  await loadMyPlants();
556
  } catch (err) {
 
16
  updateMyPlantFirstWaterDate,
17
  uploadMyPlantPhoto,
18
  verifyGoogleToken,
19
+ setUnauthorizedHandler,
20
  toAbsoluteImage,
21
  toOptimizedImage
22
  } from "./api";
 
112
  return `${year}-${month}-${day}`;
113
  }
114
 
115
+ function parseJwtPayload(token) {
116
+ try {
117
+ const parts = String(token || "").split(".");
118
+ if (parts.length < 2) {
119
+ return null;
120
+ }
121
+ const base64 = parts[1].replace(/-/g, "+").replace(/_/g, "/");
122
+ const padded = base64 + "=".repeat((4 - (base64.length % 4)) % 4);
123
+ const json = window.atob(padded);
124
+ return JSON.parse(json);
125
+ } catch {
126
+ return null;
127
+ }
128
+ }
129
+
130
  export default function App({ googleClientIdConfigured = false }) {
131
  const [auth, setAuth] = useState(null);
132
  const [authBusy, setAuthBusy] = useState(false);
 
165
  const [uploadingPhotoId, setUploadingPhotoId] = useState(null);
166
  const plantPhotoInputRef = useRef(null);
167
  const plantPhotoTargetIdRef = useRef(null);
168
+ const refreshPromiseRef = useRef(null);
169
 
170
  const [busy, setBusy] = useState({
171
  search: false,
 
277
  .filter((entry) => entry.value !== null && entry.value !== "");
278
  }, [myPlantProfile]);
279
 
280
+ async function refreshGoogleSessionSilently() {
281
+ if (refreshPromiseRef.current) {
282
+ return refreshPromiseRef.current;
283
+ }
284
+
285
+ const currentToken = auth?.idToken || "";
286
+ const payload = parseJwtPayload(currentToken);
287
+ const clientId = String(payload?.aud || "").trim();
288
+
289
+ if (!clientId || !window.google?.accounts?.id) {
290
+ return false;
291
+ }
292
+
293
+ const refreshPromise = new Promise((resolve) => {
294
+ let done = false;
295
+ const finish = (ok) => {
296
+ if (done) {
297
+ return;
298
+ }
299
+ done = true;
300
+ resolve(ok);
301
+ };
302
+
303
+ const timeoutId = window.setTimeout(() => finish(false), 8000);
304
+
305
+ try {
306
+ window.google.accounts.id.initialize({
307
+ client_id: clientId,
308
+ auto_select: true,
309
+ cancel_on_tap_outside: false,
310
+ callback: async (credentialResponse) => {
311
+ const idToken = credentialResponse?.credential || "";
312
+ if (!idToken) {
313
+ window.clearTimeout(timeoutId);
314
+ finish(false);
315
+ return;
316
+ }
317
+
318
+ try {
319
+ const data = await verifyGoogleToken(idToken);
320
+ const nextAuth = {
321
+ idToken,
322
+ user: data.user || auth?.user || null,
323
+ expiresAt: data.expires_at || null
324
+ };
325
+ setAuth(nextAuth);
326
+ setAuthToken(idToken);
327
+ window.localStorage.setItem(AUTH_STORAGE_KEY, JSON.stringify(nextAuth));
328
+ window.clearTimeout(timeoutId);
329
+ finish(true);
330
+ } catch {
331
+ window.clearTimeout(timeoutId);
332
+ finish(false);
333
+ }
334
+ }
335
+ });
336
+
337
+ window.google.accounts.id.prompt((notification) => {
338
+ if (notification.isNotDisplayed() || notification.isSkippedMoment()) {
339
+ window.clearTimeout(timeoutId);
340
+ finish(false);
341
+ }
342
+ });
343
+ } catch {
344
+ window.clearTimeout(timeoutId);
345
+ finish(false);
346
+ }
347
+ });
348
+
349
+ refreshPromiseRef.current = refreshPromise;
350
+ try {
351
+ return await refreshPromise;
352
+ } finally {
353
+ refreshPromiseRef.current = null;
354
+ }
355
+ }
356
+
357
  useEffect(() => {
358
  const raw = window.localStorage.getItem(AUTH_STORAGE_KEY);
359
  if (!raw) {
 
375
  }
376
  }, []);
377
 
378
+ useEffect(() => {
379
+ setUnauthorizedHandler(async () => {
380
+ const refreshed = await refreshGoogleSessionSilently();
381
+ if (!refreshed) {
382
+ setError("Sessione Google scaduta. Tocca Accedi per rinnovarla.");
383
+ }
384
+ return refreshed;
385
+ });
386
+
387
+ return () => setUnauthorizedHandler(null);
388
+ }, [auth?.idToken]);
389
+
390
+ useEffect(() => {
391
+ const exp = Number(auth?.expiresAt || 0);
392
+ if (!exp) {
393
+ return undefined;
394
+ }
395
+
396
+ const refreshBeforeMs = 2 * 60 * 1000;
397
+ const delayMs = (exp * 1000) - Date.now() - refreshBeforeMs;
398
+ if (delayMs <= 0) {
399
+ return undefined;
400
+ }
401
+
402
+ const timerId = window.setTimeout(() => {
403
+ refreshGoogleSessionSilently();
404
+ }, delayMs);
405
+
406
+ return () => window.clearTimeout(timerId);
407
+ }, [auth?.expiresAt, auth?.idToken]);
408
+
409
  useEffect(() => {
410
  setExpandedProfileKey("");
411
  }, [plantProfile]);
 
675
  try {
676
  const data = await saveMyPlant(selectedSpecies, trimmed);
677
  const saved = data.saved || null;
678
+ let photoUploaded = false;
679
+ if (saved?.id && file) {
680
+ try {
681
+ await uploadMyPlantPhoto(saved.id, file);
682
+ photoUploaded = true;
683
+ } catch {
684
+ photoUploaded = false;
685
+ }
686
+ }
687
+
688
+ if (saved) {
689
+ setSaveStatus(
690
+ photoUploaded
691
+ ? `Salvata: ${saved.user_given_name} (foto associata)`
692
+ : `Salvata: ${saved.user_given_name}`
693
+ );
694
+ } else {
695
+ setSaveStatus("Pianta salvata.");
696
+ }
697
  setUserPlantName("");
698
  await loadMyPlants();
699
  } catch (err) {
pwa-app/src/api.js CHANGED
@@ -10,6 +10,7 @@ function getApiBase() {
10
 
11
  const API_BASE = getApiBase();
12
  let authToken = "";
 
13
 
14
  function buildUrl(path) {
15
  return `${API_BASE}${path}`;
@@ -19,16 +20,37 @@ export function setAuthToken(token) {
19
  authToken = token || "";
20
  }
21
 
 
 
 
 
22
  async function apiFetch(path, options = {}) {
23
- const headers = new Headers(options.headers || {});
24
- if (authToken) {
25
- headers.set("Authorization", `Bearer ${authToken}`);
 
 
 
 
 
 
 
26
  }
27
 
28
- return fetch(buildUrl(path), {
29
- ...options,
30
- headers
31
- });
 
 
 
 
 
 
 
 
 
 
32
  }
33
 
34
  async function parseResponse(response) {
 
10
 
11
  const API_BASE = getApiBase();
12
  let authToken = "";
13
+ let unauthorizedHandler = null;
14
 
15
  function buildUrl(path) {
16
  return `${API_BASE}${path}`;
 
20
  authToken = token || "";
21
  }
22
 
23
+ export function setUnauthorizedHandler(handler) {
24
+ unauthorizedHandler = typeof handler === "function" ? handler : null;
25
+ }
26
+
27
  async function apiFetch(path, options = {}) {
28
+ async function doFetch() {
29
+ const headers = new Headers(options.headers || {});
30
+ if (authToken) {
31
+ headers.set("Authorization", `Bearer ${authToken}`);
32
+ }
33
+
34
+ return fetch(buildUrl(path), {
35
+ ...options,
36
+ headers
37
+ });
38
  }
39
 
40
+ const response = await doFetch();
41
+
42
+ if (
43
+ response.status === 401
44
+ && !options.__skipAuthRefresh
45
+ && unauthorizedHandler
46
+ ) {
47
+ const refreshed = await unauthorizedHandler();
48
+ if (refreshed) {
49
+ return doFetch();
50
+ }
51
+ }
52
+
53
+ return response;
54
  }
55
 
56
  async function parseResponse(response) {
sync_leafsnap_aliases.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Populate leafsnap_aliases table in plants.db and update missing_species.csv.
4
+
5
+ For each LeafSnap species not in the DB:
6
+ - If a DB species with the same genus has an epithet edit-distance <= EDIT_THRESHOLD
7
+ (or the full name edit-distance <= EDIT_THRESHOLD), it is saved as an alias.
8
+ - Otherwise it is appended to missing_species.csv for manual review.
9
+
10
+ Run:
11
+ python sync_leafsnap_aliases.py [--db data/plants.db] [--cache data/leafsnap_cache.pt]
12
+ [--missing missing_species.csv] [--threshold 3] [--dry-run]
13
+ """
14
+
15
+ import argparse
16
+ import csv
17
+ import os
18
+ import sqlite3
19
+ from collections import defaultdict
20
+ from pathlib import Path
21
+
22
+
23
+ def _edit_distance(a: str, b: str) -> int:
24
+ """Simple Levenshtein distance."""
25
+ a, b = a.lower(), b.lower()
26
+ if a == b:
27
+ return 0
28
+ m, n = len(a), len(b)
29
+ dp = list(range(n + 1))
30
+ for i in range(1, m + 1):
31
+ prev = dp[:]
32
+ dp[0] = i
33
+ for j in range(1, n + 1):
34
+ cost = 0 if a[i - 1] == b[j - 1] else 1
35
+ dp[j] = min(dp[j] + 1, dp[j - 1] + 1, prev[j - 1] + cost)
36
+ return dp[n]
37
+
38
+
39
+ def find_best_alias(
40
+ leafsnap_label: str,
41
+ db_species: set[str],
42
+ db_by_genus: dict[str, list[str]],
43
+ threshold: int,
44
+ ) -> str | None:
45
+ """Return the best-matching DB species within *threshold* edit distance, or None."""
46
+ genus = leafsnap_label.split()[0]
47
+ epithet = " ".join(leafsnap_label.split()[1:])
48
+
49
+ candidates = db_by_genus.get(genus, [])
50
+ if not candidates:
51
+ return None
52
+
53
+ best_label: str | None = None
54
+ best_dist = threshold + 1
55
+
56
+ for db_sp in candidates:
57
+ db_epithet = " ".join(db_sp.split()[1:])
58
+ # Compare epithet only (genus already matches)
59
+ d = _edit_distance(epithet, db_epithet)
60
+ if d < best_dist:
61
+ best_dist = d
62
+ best_label = db_sp
63
+
64
+ return best_label if best_dist <= threshold else None
65
+
66
+
67
+ def main() -> None:
68
+ parser = argparse.ArgumentParser(description="Sync LeafSnap aliases into plants.db")
69
+ parser.add_argument("--db", default=os.getenv("PLANTS_SQLITE_PATH", "data/plants.db"))
70
+ parser.add_argument("--cache", default=os.getenv("LEAFSNAP_CACHE_PATH", "data/leafsnap_cache.pt"))
71
+ parser.add_argument("--missing", default="missing_species.csv")
72
+ parser.add_argument("--threshold", type=int, default=2,
73
+ help="Max Levenshtein distance on epithet to consider an alias (default: 2)")
74
+ parser.add_argument("--dry-run", action="store_true",
75
+ help="Print results without writing to DB or CSV")
76
+ args = parser.parse_args()
77
+
78
+ import torch
79
+
80
+ print(f"Loading LeafSnap cache from {args.cache} ...")
81
+ ls_data = torch.load(args.cache, map_location="cpu", weights_only=False)
82
+ leafsnap_species = set(ls_data["labels"])
83
+
84
+ print(f"Loading DB species from {args.db} ...")
85
+ conn = sqlite3.connect(args.db)
86
+
87
+ # Ensure table exists
88
+ conn.execute(
89
+ """
90
+ CREATE TABLE IF NOT EXISTS leafsnap_aliases (
91
+ leafsnap_label TEXT PRIMARY KEY,
92
+ db_species_name TEXT NOT NULL
93
+ )
94
+ """
95
+ )
96
+ conn.commit()
97
+
98
+ db_species = set(row[0] for row in conn.execute("SELECT species_name FROM plants"))
99
+ existing_aliases = dict(conn.execute("SELECT leafsnap_label, db_species_name FROM leafsnap_aliases"))
100
+
101
+ db_by_genus: dict[str, list[str]] = defaultdict(list)
102
+ for s in db_species:
103
+ db_by_genus[s.split()[0]].append(s)
104
+
105
+ # Classify each LeafSnap species
106
+ to_alias: list[tuple[str, str]] = [] # (leafsnap_label, db_species_name)
107
+ to_missing: list[str] = []
108
+
109
+ # Hardcoded typo overrides (edit distance > threshold but unambiguously same species)
110
+ TYPO_OVERRIDES: dict[str, str] = {
111
+ "Aesculus hippocastamon": "Aesculus hippocastanum",
112
+ }
113
+
114
+ for sp in sorted(leafsnap_species):
115
+ if sp in db_species:
116
+ continue # already in DB, no alias needed
117
+ if sp in existing_aliases:
118
+ print(f" [skip] {sp} -> {existing_aliases[sp]} (already in aliases)")
119
+ continue
120
+
121
+ if sp in TYPO_OVERRIDES:
122
+ target = TYPO_OVERRIDES[sp]
123
+ if target in db_species:
124
+ to_alias.append((sp, target))
125
+ continue
126
+
127
+ match = find_best_alias(sp, db_species, db_by_genus, args.threshold)
128
+ if match:
129
+ to_alias.append((sp, match))
130
+ else:
131
+ to_missing.append(sp)
132
+
133
+ print(f"\nNew aliases found: {len(to_alias)}")
134
+ for ls_lbl, db_lbl in to_alias:
135
+ print(f" {ls_lbl} -> {db_lbl}")
136
+
137
+ print(f"\nUnresolvable (-> missing_species.csv): {len(to_missing)}")
138
+ for sp in to_missing:
139
+ print(f" {sp}")
140
+
141
+ if args.dry_run:
142
+ print("\n[dry-run] No changes written.")
143
+ conn.close()
144
+ return
145
+
146
+ # Write aliases to DB
147
+ if to_alias:
148
+ conn.executemany(
149
+ "INSERT OR REPLACE INTO leafsnap_aliases (leafsnap_label, db_species_name) VALUES (?, ?)",
150
+ to_alias,
151
+ )
152
+ conn.commit()
153
+ print(f"\nSaved {len(to_alias)} aliases to leafsnap_aliases table.")
154
+
155
+ conn.close()
156
+
157
+ # Append to missing_species.csv (avoid duplicates)
158
+ missing_path = Path(args.missing)
159
+ existing_missing: set[str] = set()
160
+ if missing_path.exists():
161
+ with open(missing_path, newline="", encoding="utf-8") as f:
162
+ reader = csv.DictReader(f)
163
+ for row in reader:
164
+ existing_missing.add(row["species_name"])
165
+
166
+ new_missing = [sp for sp in to_missing if sp not in existing_missing]
167
+ if new_missing:
168
+ write_header = not missing_path.exists() or missing_path.stat().st_size == 0
169
+ with open(missing_path, "a", newline="", encoding="utf-8") as f:
170
+ writer = csv.DictWriter(f, fieldnames=["species_name"])
171
+ if write_header:
172
+ writer.writeheader()
173
+ for sp in new_missing:
174
+ writer.writerow({"species_name": sp})
175
+ print(f"Appended {len(new_missing)} species to {missing_path}.")
176
+ else:
177
+ print("No new missing species to append.")
178
+
179
+
180
+ if __name__ == "__main__":
181
+ main()