Minerva666 commited on
Commit
6d79e71
·
verified ·
1 Parent(s): 0535053

Upload tools.py

Browse files
Files changed (1) hide show
  1. tools.py +81 -0
tools.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from typing import Dict, Any
4
+ from query import NHMQuery
5
+
6
+ GBIF_OCCURRENCE_SEARCH = "https://api.gbif.org/v1/occurrence/search"
7
+ NHM_INSTITUTION_CODE = "NHMUK"
8
+
9
+
10
+ def search_nhm_occurrences(
11
+ scientific_name=None,
12
+ country=None,
13
+ year=None,
14
+ limit=20,
15
+ offset=0
16
+ ):
17
+ params = {
18
+ "limit": limit,
19
+ "offset": offset,
20
+ "institutionCode": NHM_INSTITUTION_CODE
21
+ }
22
+
23
+ if scientific_name:
24
+ params["scientificName"] = scientific_name
25
+ if country:
26
+ params["country"] = country
27
+ if year:
28
+ params["year"] = year
29
+
30
+ response = requests.get(GBIF_OCCURRENCE_SEARCH, params=params)
31
+ response.raise_for_status()
32
+ return response.json()
33
+
34
+
35
+ def nhm_occurrence_tool(query: NHMQuery) -> Dict[str, Any]:
36
+ raw = search_nhm_occurrences(
37
+ scientific_name=query.scientific_name,
38
+ country=query.country,
39
+ year=query.year,
40
+ limit=query.limit,
41
+ offset=query.offset
42
+ )
43
+
44
+ df = pd.DataFrame(raw.get("results", []))
45
+
46
+ return {
47
+ "query_used": query.__dict__,
48
+ "record_count": raw.get("count"),
49
+ "returned_records": len(df),
50
+ "dataframe": df,
51
+ }
52
+
53
+
54
+ def summarize_occurrences(df: pd.DataFrame) -> Dict[str, Any]:
55
+ if df.empty:
56
+ return {"summary": "No records returned"}
57
+
58
+ summary = {
59
+ "total_records": len(df)
60
+ }
61
+
62
+ if "country" in df.columns:
63
+ summary["top_countries"] = df["country"].value_counts().head(5).to_dict()
64
+
65
+ if "year" in df.columns:
66
+ summary["year_range"] = {
67
+ "min": int(df["year"].min()),
68
+ "max": int(df["year"].max())
69
+ }
70
+
71
+ if "recordedBy" in df.columns:
72
+ collectors = (
73
+ df["recordedBy"]
74
+ .dropna()
75
+ .astype(str)
76
+ .str.strip("[]")
77
+ .str.replace("'", "")
78
+ )
79
+ summary["top_collectors"] = collectors.value_counts().head(5).to_dict()
80
+
81
+ return summary