Spaces:
Running
Running
feat: Add map and chart visualizations for conservation data
Browse files- Add GlobILClient.fetch_features() to get actual GeoJSON features from Feature Services
- Add GlobILClient.search_and_fetch_features() for topic-based feature search
- Add GlobILClient.get_dataset_with_features() for specific dataset features
- Add ExternalDataHandler.fetch_conservation_features() wrapper
- Add create_conservation_map() for interactive Folium maps with polygon/point layers
- Add create_conservation_chart() for bar/pie charts of feature distribution
- Update process_query() conservation routing to generate maps and charts
- Improve city name matching for POI queries (Berlin, Germany now works)
- Add logger to app.py for consistent logging
- app.py +334 -27
- data_utils.py +49 -1
- external_apis.py +155 -0
app.py
CHANGED
|
@@ -15,6 +15,11 @@ import numpy as np
|
|
| 15 |
from pathlib import Path
|
| 16 |
import warnings
|
| 17 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Suppress GeoPandas CRS warnings (area/centroid calculations are approximate for demo purposes)
|
| 20 |
warnings.filterwarnings('ignore', message='.*Geometry is in a geographic CRS.*')
|
|
@@ -23,6 +28,9 @@ warnings.filterwarnings('ignore', message='.*Geometry is in a geographic CRS.*')
|
|
| 23 |
warnings.filterwarnings('ignore', message='.*Invalid file descriptor.*')
|
| 24 |
logging.getLogger('asyncio').setLevel(logging.CRITICAL)
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
import branca.colormap as cm
|
| 27 |
import zipfile
|
| 28 |
import urllib.request
|
|
@@ -241,15 +249,21 @@ def fetch_poi_data(parsed_query):
|
|
| 241 |
city_coords = None
|
| 242 |
city_name = None
|
| 243 |
for loc in locations:
|
| 244 |
-
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
| 246 |
if loc_lower in CITY_COORDINATES:
|
| 247 |
city_coords = CITY_COORDINATES[loc_lower]
|
| 248 |
city_name = loc
|
| 249 |
break
|
| 250 |
-
|
|
|
|
| 251 |
for city, coords in CITY_COORDINATES.items():
|
| 252 |
-
|
|
|
|
|
|
|
| 253 |
city_coords = coords
|
| 254 |
city_name = loc
|
| 255 |
break
|
|
@@ -393,6 +407,250 @@ def create_conservation_table(datasets):
|
|
| 393 |
return pd.DataFrame(rows)
|
| 394 |
|
| 395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
def fetch_geospatial_data(parsed_query):
|
| 397 |
"""
|
| 398 |
Fetch and process geospatial data based on parsed query
|
|
@@ -900,33 +1158,82 @@ def process_query(user_query, output_format, chart_type, map_style, color_scheme
|
|
| 900 |
# EXTERNAL API ROUTING: Conservation Data (WWF GLOBIL)
|
| 901 |
# =================================================================
|
| 902 |
if data_source == "conservation":
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
if not datasets:
|
| 906 |
-
error_msg = f"❌ **Conservation Data Query Failed**\n\n{status_msg}\n\n"
|
| 907 |
-
error_msg += "**Available Topics:**\n"
|
| 908 |
-
error_msg += "- forests, wildlife, oceans, freshwater, climate\n\n"
|
| 909 |
-
error_msg += "**Try queries like:**\n"
|
| 910 |
-
error_msg += "- 'Search for deforestation datasets'\n"
|
| 911 |
-
error_msg += "- 'Find wildlife conservation data'\n"
|
| 912 |
-
error_msg += "- 'Show ocean protection datasets'"
|
| 913 |
-
return None, None, None, error_msg, None, None
|
| 914 |
|
| 915 |
-
|
| 916 |
-
|
|
|
|
|
|
|
| 917 |
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
|
| 929 |
-
return
|
| 930 |
|
| 931 |
# =================================================================
|
| 932 |
# DEFAULT: Country/Region Data (Natural Earth)
|
|
|
|
| 15 |
from pathlib import Path
|
| 16 |
import warnings
|
| 17 |
import logging
|
| 18 |
+
import sys
|
| 19 |
+
|
| 20 |
+
# Suppress Python 3.13 asyncio cleanup warnings (harmless garbage collection issue)
|
| 21 |
+
if sys.version_info >= (3, 13):
|
| 22 |
+
warnings.filterwarnings('ignore', message='.*Invalid file descriptor.*')
|
| 23 |
|
| 24 |
# Suppress GeoPandas CRS warnings (area/centroid calculations are approximate for demo purposes)
|
| 25 |
warnings.filterwarnings('ignore', message='.*Geometry is in a geographic CRS.*')
|
|
|
|
| 28 |
warnings.filterwarnings('ignore', message='.*Invalid file descriptor.*')
|
| 29 |
logging.getLogger('asyncio').setLevel(logging.CRITICAL)
|
| 30 |
|
| 31 |
+
# Configure logger for this module
|
| 32 |
+
logger = logging.getLogger(__name__)
|
| 33 |
+
|
| 34 |
import branca.colormap as cm
|
| 35 |
import zipfile
|
| 36 |
import urllib.request
|
|
|
|
| 249 |
city_coords = None
|
| 250 |
city_name = None
|
| 251 |
for loc in locations:
|
| 252 |
+
# Normalize: lowercase, remove commas, extra spaces
|
| 253 |
+
loc_lower = loc.lower().replace(",", " ").strip()
|
| 254 |
+
loc_words = set(loc_lower.split())
|
| 255 |
+
|
| 256 |
+
# Check direct match first
|
| 257 |
if loc_lower in CITY_COORDINATES:
|
| 258 |
city_coords = CITY_COORDINATES[loc_lower]
|
| 259 |
city_name = loc
|
| 260 |
break
|
| 261 |
+
|
| 262 |
+
# Check if any city name appears in the location string
|
| 263 |
for city, coords in CITY_COORDINATES.items():
|
| 264 |
+
city_words = set(city.split())
|
| 265 |
+
# Match if city name is contained in location OR location word matches city
|
| 266 |
+
if city in loc_lower or city_words & loc_words:
|
| 267 |
city_coords = coords
|
| 268 |
city_name = loc
|
| 269 |
break
|
|
|
|
| 407 |
return pd.DataFrame(rows)
|
| 408 |
|
| 409 |
|
| 410 |
+
def create_conservation_map(feature_data_list, map_style='Light'):
|
| 411 |
+
"""
|
| 412 |
+
Create a Folium map showing conservation features from GLOBIL.
|
| 413 |
+
|
| 414 |
+
Args:
|
| 415 |
+
feature_data_list: List of dicts with 'dataset' and 'features' keys
|
| 416 |
+
map_style: Map tile style
|
| 417 |
+
|
| 418 |
+
Returns:
|
| 419 |
+
Folium map object
|
| 420 |
+
"""
|
| 421 |
+
if not feature_data_list:
|
| 422 |
+
return None
|
| 423 |
+
|
| 424 |
+
# Collect all coordinates to calculate center
|
| 425 |
+
all_coords = []
|
| 426 |
+
|
| 427 |
+
for data in feature_data_list:
|
| 428 |
+
for feature in data.get('features', []):
|
| 429 |
+
geom = feature.get('geometry', {})
|
| 430 |
+
geom_type = geom.get('type', '')
|
| 431 |
+
coords = geom.get('coordinates', [])
|
| 432 |
+
|
| 433 |
+
if geom_type == 'Point' and len(coords) >= 2:
|
| 434 |
+
all_coords.append((coords[1], coords[0])) # lat, lon
|
| 435 |
+
elif geom_type == 'Polygon' and coords:
|
| 436 |
+
# Get centroid of first ring
|
| 437 |
+
ring = coords[0] if isinstance(coords[0], list) and coords[0] else []
|
| 438 |
+
if ring and len(ring) > 0:
|
| 439 |
+
lons = [c[0] for c in ring if len(c) >= 2]
|
| 440 |
+
lats = [c[1] for c in ring if len(c) >= 2]
|
| 441 |
+
if lons and lats:
|
| 442 |
+
all_coords.append((sum(lats)/len(lats), sum(lons)/len(lons)))
|
| 443 |
+
elif geom_type == 'MultiPolygon' and coords:
|
| 444 |
+
for polygon in coords:
|
| 445 |
+
if polygon and polygon[0]:
|
| 446 |
+
ring = polygon[0]
|
| 447 |
+
lons = [c[0] for c in ring if len(c) >= 2]
|
| 448 |
+
lats = [c[1] for c in ring if len(c) >= 2]
|
| 449 |
+
if lons and lats:
|
| 450 |
+
all_coords.append((sum(lats)/len(lats), sum(lons)/len(lons)))
|
| 451 |
+
|
| 452 |
+
if not all_coords:
|
| 453 |
+
# Default to world center if no coordinates found
|
| 454 |
+
center_lat, center_lon = 20, 0
|
| 455 |
+
else:
|
| 456 |
+
center_lat = sum(c[0] for c in all_coords) / len(all_coords)
|
| 457 |
+
center_lon = sum(c[1] for c in all_coords) / len(all_coords)
|
| 458 |
+
|
| 459 |
+
tiles = MAP_STYLES.get(map_style, 'CartoDB positron')
|
| 460 |
+
m = folium.Map(location=[center_lat, center_lon], zoom_start=3, tiles=tiles)
|
| 461 |
+
|
| 462 |
+
# Color palette for different datasets
|
| 463 |
+
colors = ['#2ecc71', '#3498db', '#9b59b6', '#e74c3c', '#f39c12', '#1abc9c']
|
| 464 |
+
|
| 465 |
+
for i, data in enumerate(feature_data_list):
|
| 466 |
+
dataset = data.get('dataset', {})
|
| 467 |
+
features = data.get('features', [])
|
| 468 |
+
dataset_title = dataset.get('title', f'Dataset {i+1}')
|
| 469 |
+
color = colors[i % len(colors)]
|
| 470 |
+
|
| 471 |
+
# Create a feature group for this dataset
|
| 472 |
+
fg = folium.FeatureGroup(name=dataset_title)
|
| 473 |
+
|
| 474 |
+
for feature in features[:100]: # Limit features per dataset
|
| 475 |
+
geom = feature.get('geometry', {})
|
| 476 |
+
props = feature.get('properties', {})
|
| 477 |
+
geom_type = geom.get('type', '')
|
| 478 |
+
|
| 479 |
+
# Build popup content
|
| 480 |
+
popup_lines = [f"<b>{dataset_title}</b>"]
|
| 481 |
+
for key, value in list(props.items())[:5]: # Show first 5 properties
|
| 482 |
+
if value and str(value).strip():
|
| 483 |
+
popup_lines.append(f"{key}: {value}")
|
| 484 |
+
popup_html = "<br>".join(popup_lines)
|
| 485 |
+
|
| 486 |
+
try:
|
| 487 |
+
if geom_type == 'Point':
|
| 488 |
+
coords = geom.get('coordinates', [])
|
| 489 |
+
if len(coords) >= 2:
|
| 490 |
+
folium.CircleMarker(
|
| 491 |
+
location=[coords[1], coords[0]],
|
| 492 |
+
radius=6,
|
| 493 |
+
color=color,
|
| 494 |
+
fill=True,
|
| 495 |
+
fillColor=color,
|
| 496 |
+
fillOpacity=0.7,
|
| 497 |
+
popup=popup_html
|
| 498 |
+
).add_to(fg)
|
| 499 |
+
|
| 500 |
+
elif geom_type in ['Polygon', 'MultiPolygon']:
|
| 501 |
+
folium.GeoJson(
|
| 502 |
+
feature,
|
| 503 |
+
style_function=lambda x, c=color: {
|
| 504 |
+
'fillColor': c,
|
| 505 |
+
'color': c,
|
| 506 |
+
'weight': 2,
|
| 507 |
+
'fillOpacity': 0.4
|
| 508 |
+
},
|
| 509 |
+
popup=folium.Popup(popup_html, max_width=300)
|
| 510 |
+
).add_to(fg)
|
| 511 |
+
|
| 512 |
+
elif geom_type in ['LineString', 'MultiLineString']:
|
| 513 |
+
folium.GeoJson(
|
| 514 |
+
feature,
|
| 515 |
+
style_function=lambda x, c=color: {
|
| 516 |
+
'color': c,
|
| 517 |
+
'weight': 3
|
| 518 |
+
},
|
| 519 |
+
popup=folium.Popup(popup_html, max_width=300)
|
| 520 |
+
).add_to(fg)
|
| 521 |
+
except Exception as e:
|
| 522 |
+
logger.debug(f"Error adding feature: {e}")
|
| 523 |
+
continue
|
| 524 |
+
|
| 525 |
+
fg.add_to(m)
|
| 526 |
+
|
| 527 |
+
# Add layer control if multiple datasets
|
| 528 |
+
if len(feature_data_list) > 1:
|
| 529 |
+
folium.LayerControl().add_to(m)
|
| 530 |
+
|
| 531 |
+
return m
|
| 532 |
+
|
| 533 |
+
|
| 534 |
+
def create_conservation_chart(feature_data_list, chart_type='bar'):
|
| 535 |
+
"""
|
| 536 |
+
Create a Plotly chart from conservation feature data.
|
| 537 |
+
|
| 538 |
+
Args:
|
| 539 |
+
feature_data_list: List of dicts with 'dataset' and 'features' keys
|
| 540 |
+
chart_type: Type of chart ('bar', 'pie')
|
| 541 |
+
|
| 542 |
+
Returns:
|
| 543 |
+
Plotly figure
|
| 544 |
+
"""
|
| 545 |
+
if not feature_data_list:
|
| 546 |
+
return None
|
| 547 |
+
|
| 548 |
+
# Aggregate data by dataset
|
| 549 |
+
chart_data = []
|
| 550 |
+
for data in feature_data_list:
|
| 551 |
+
dataset = data.get('dataset', {})
|
| 552 |
+
features = data.get('features', [])
|
| 553 |
+
chart_data.append({
|
| 554 |
+
'Dataset': dataset.get('title', 'Unknown')[:40],
|
| 555 |
+
'Feature Count': len(features),
|
| 556 |
+
'Views': dataset.get('views', 0)
|
| 557 |
+
})
|
| 558 |
+
|
| 559 |
+
if not chart_data:
|
| 560 |
+
return None
|
| 561 |
+
|
| 562 |
+
df = pd.DataFrame(chart_data)
|
| 563 |
+
|
| 564 |
+
if chart_type == 'pie':
|
| 565 |
+
fig = px.pie(
|
| 566 |
+
df,
|
| 567 |
+
values='Feature Count',
|
| 568 |
+
names='Dataset',
|
| 569 |
+
title='Conservation Features by Dataset',
|
| 570 |
+
color_discrete_sequence=px.colors.qualitative.Set2
|
| 571 |
+
)
|
| 572 |
+
else: # Default to bar
|
| 573 |
+
fig = px.bar(
|
| 574 |
+
df,
|
| 575 |
+
x='Dataset',
|
| 576 |
+
y='Feature Count',
|
| 577 |
+
color='Dataset',
|
| 578 |
+
title='Conservation Features by Dataset',
|
| 579 |
+
color_discrete_sequence=px.colors.qualitative.Set2
|
| 580 |
+
)
|
| 581 |
+
fig.update_layout(xaxis_tickangle=-45)
|
| 582 |
+
|
| 583 |
+
fig.update_layout(
|
| 584 |
+
template='plotly_white',
|
| 585 |
+
showlegend=True,
|
| 586 |
+
height=400
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
return fig
|
| 590 |
+
|
| 591 |
+
|
| 592 |
+
def fetch_conservation_features_for_query(parsed_query):
|
| 593 |
+
"""
|
| 594 |
+
Fetch conservation features based on parsed query.
|
| 595 |
+
|
| 596 |
+
Returns:
|
| 597 |
+
Tuple of (feature_data_list, status_message, metadata_list)
|
| 598 |
+
"""
|
| 599 |
+
if not EXTERNAL_APIS_AVAILABLE or external_data is None:
|
| 600 |
+
return [], "External APIs not available", []
|
| 601 |
+
|
| 602 |
+
topic = parsed_query.get("conservation_topic", "forests")
|
| 603 |
+
|
| 604 |
+
# Topic detection from query
|
| 605 |
+
query_keywords = {
|
| 606 |
+
"deforestation": "forests",
|
| 607 |
+
"forest": "forests",
|
| 608 |
+
"wildlife": "wildlife",
|
| 609 |
+
"endangered": "wildlife",
|
| 610 |
+
"species": "wildlife",
|
| 611 |
+
"marine": "oceans",
|
| 612 |
+
"ocean": "oceans",
|
| 613 |
+
"coral": "oceans",
|
| 614 |
+
"protected": "wildlife",
|
| 615 |
+
"river": "freshwater",
|
| 616 |
+
"water": "freshwater",
|
| 617 |
+
"climate": "climate",
|
| 618 |
+
"carbon": "climate"
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
all_text = " ".join(parsed_query.get("locations", []) + parsed_query.get("indicators", [])).lower()
|
| 622 |
+
for keyword, topic_name in query_keywords.items():
|
| 623 |
+
if keyword in all_text:
|
| 624 |
+
topic = topic_name
|
| 625 |
+
break
|
| 626 |
+
|
| 627 |
+
try:
|
| 628 |
+
# Fetch features from GLOBIL
|
| 629 |
+
feature_data = external_data.fetch_conservation_features(
|
| 630 |
+
topic,
|
| 631 |
+
max_datasets=3,
|
| 632 |
+
max_features=200
|
| 633 |
+
)
|
| 634 |
+
|
| 635 |
+
if not feature_data:
|
| 636 |
+
# Fallback to metadata only
|
| 637 |
+
datasets = external_data.search_conservation_data(topic, limit=10)
|
| 638 |
+
return [], f"No feature data available for {topic}. Showing metadata only.", datasets
|
| 639 |
+
|
| 640 |
+
total_features = sum(d.get('feature_count', 0) for d in feature_data)
|
| 641 |
+
datasets_found = len(feature_data)
|
| 642 |
+
|
| 643 |
+
return (
|
| 644 |
+
feature_data,
|
| 645 |
+
f"Found {total_features} features from {datasets_found} datasets about {topic}",
|
| 646 |
+
[d.get('dataset', {}) for d in feature_data]
|
| 647 |
+
)
|
| 648 |
+
|
| 649 |
+
except Exception as e:
|
| 650 |
+
logger.error(f"Error fetching conservation features: {e}")
|
| 651 |
+
return [], f"Error fetching data: {str(e)}", []
|
| 652 |
+
|
| 653 |
+
|
| 654 |
def fetch_geospatial_data(parsed_query):
|
| 655 |
"""
|
| 656 |
Fetch and process geospatial data based on parsed query
|
|
|
|
| 1158 |
# EXTERNAL API ROUTING: Conservation Data (WWF GLOBIL)
|
| 1159 |
# =================================================================
|
| 1160 |
if data_source == "conservation":
|
| 1161 |
+
# Try to fetch actual feature data first
|
| 1162 |
+
feature_data, status_msg, metadata = fetch_conservation_features_for_query(parsed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1163 |
|
| 1164 |
+
map_html = None
|
| 1165 |
+
chart_fig = None
|
| 1166 |
+
map_file = None
|
| 1167 |
+
csv_file = None
|
| 1168 |
|
| 1169 |
+
if feature_data:
|
| 1170 |
+
# We have actual feature data - create map and chart
|
| 1171 |
+
cons_map = create_conservation_map(feature_data, map_style)
|
| 1172 |
+
if cons_map:
|
| 1173 |
+
map_html = cons_map._repr_html_()
|
| 1174 |
+
map_file = tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8')
|
| 1175 |
+
cons_map.save(map_file.name)
|
| 1176 |
+
map_file = map_file.name
|
| 1177 |
+
|
| 1178 |
+
# Create chart showing feature counts
|
| 1179 |
+
chart_fig = create_conservation_chart(feature_data, chart_type if chart_type in ['bar', 'pie'] else 'bar')
|
| 1180 |
+
|
| 1181 |
+
# Create table from feature properties
|
| 1182 |
+
table_rows = []
|
| 1183 |
+
for data in feature_data:
|
| 1184 |
+
dataset_title = data.get('dataset', {}).get('title', 'Unknown')
|
| 1185 |
+
for feat in data.get('features', [])[:50]: # Limit rows
|
| 1186 |
+
props = feat.get('properties', {})
|
| 1187 |
+
row = {'Dataset': dataset_title}
|
| 1188 |
+
# Get first few meaningful properties
|
| 1189 |
+
for key, val in list(props.items())[:4]:
|
| 1190 |
+
if val and str(val).strip() and key.lower() not in ['objectid', 'fid', 'shape']:
|
| 1191 |
+
row[key] = str(val)[:50]
|
| 1192 |
+
if len(row) > 1:
|
| 1193 |
+
table_rows.append(row)
|
| 1194 |
+
|
| 1195 |
+
if table_rows:
|
| 1196 |
+
cons_df = pd.DataFrame(table_rows)
|
| 1197 |
+
else:
|
| 1198 |
+
# Fallback to metadata table
|
| 1199 |
+
cons_df = create_conservation_table(metadata)
|
| 1200 |
+
|
| 1201 |
+
summary = f"🔍 **Query:** {user_query}\n\n"
|
| 1202 |
+
summary += f"📍 **Data Source:** WWF GLOBIL (ArcGIS Hub)\n"
|
| 1203 |
+
summary += f"🌿 **Topic:** {parsed.get('conservation_topic', 'General')}\n"
|
| 1204 |
+
summary += f"📊 **Results:** {status_msg}\n\n"
|
| 1205 |
+
summary += "🗺️ *Map shows conservation areas with colored layers*\n"
|
| 1206 |
+
summary += "📊 *Chart shows feature distribution across datasets*"
|
| 1207 |
+
else:
|
| 1208 |
+
# Fallback to metadata only (no feature data available)
|
| 1209 |
+
datasets, fallback_msg = fetch_conservation_data(parsed)
|
| 1210 |
+
|
| 1211 |
+
if not datasets:
|
| 1212 |
+
error_msg = f"❌ **Conservation Data Query Failed**\n\n{fallback_msg}\n\n"
|
| 1213 |
+
error_msg += "**Available Topics:**\n"
|
| 1214 |
+
error_msg += "- forests, wildlife, oceans, freshwater, climate\n\n"
|
| 1215 |
+
error_msg += "**Try queries like:**\n"
|
| 1216 |
+
error_msg += "- 'Search for deforestation datasets'\n"
|
| 1217 |
+
error_msg += "- 'Find wildlife conservation data'\n"
|
| 1218 |
+
error_msg += "- 'Show ocean protection datasets'"
|
| 1219 |
+
return None, None, None, error_msg, None, None
|
| 1220 |
+
|
| 1221 |
+
cons_df = create_conservation_table(datasets)
|
| 1222 |
+
|
| 1223 |
+
summary = f"🔍 **Query:** {user_query}\n\n"
|
| 1224 |
+
summary += f"📍 **Data Source:** WWF GLOBIL (ArcGIS Hub)\n"
|
| 1225 |
+
summary += f"🌿 **Topic:** {parsed.get('conservation_topic', 'General')}\n"
|
| 1226 |
+
summary += f"📊 **Results:** {status_msg or fallback_msg}\n\n"
|
| 1227 |
+
summary += "ℹ️ *Feature geometries not available for these datasets*\n"
|
| 1228 |
+
summary += "💡 *Showing dataset metadata table*"
|
| 1229 |
|
| 1230 |
+
# Save table to CSV
|
| 1231 |
+
if cons_df is not None and not cons_df.empty:
|
| 1232 |
+
csv_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w', encoding='utf-8')
|
| 1233 |
+
cons_df.to_csv(csv_file.name, index=False)
|
| 1234 |
+
csv_file = csv_file.name
|
| 1235 |
|
| 1236 |
+
return map_html, chart_fig, cons_df, summary, map_file, csv_file
|
| 1237 |
|
| 1238 |
# =================================================================
|
| 1239 |
# DEFAULT: Country/Region Data (Natural Earth)
|
data_utils.py
CHANGED
|
@@ -445,7 +445,55 @@ class ExternalDataHandler:
|
|
| 445 |
return []
|
| 446 |
|
| 447 |
return self._globil.get_public_feature_layers(category)
|
| 448 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
# =========================================================================
|
| 450 |
# Utility Methods
|
| 451 |
# =========================================================================
|
|
|
|
| 445 |
return []
|
| 446 |
|
| 447 |
return self._globil.get_public_feature_layers(category)
|
| 448 |
+
|
| 449 |
+
def fetch_conservation_features(
|
| 450 |
+
self,
|
| 451 |
+
topic: str,
|
| 452 |
+
max_datasets: int = 3,
|
| 453 |
+
max_features: int = 500
|
| 454 |
+
) -> List[Dict]:
|
| 455 |
+
"""
|
| 456 |
+
Fetch actual feature data from conservation datasets.
|
| 457 |
+
|
| 458 |
+
Args:
|
| 459 |
+
topic: Conservation topic to search
|
| 460 |
+
max_datasets: Maximum datasets to query
|
| 461 |
+
max_features: Maximum features per dataset
|
| 462 |
+
|
| 463 |
+
Returns:
|
| 464 |
+
List of dictionaries with dataset info and GeoJSON features
|
| 465 |
+
"""
|
| 466 |
+
self._ensure_initialized()
|
| 467 |
+
if not self._globil:
|
| 468 |
+
return []
|
| 469 |
+
|
| 470 |
+
return self._globil.search_and_fetch_features(
|
| 471 |
+
topic,
|
| 472 |
+
max_datasets=max_datasets,
|
| 473 |
+
max_features_per_dataset=max_features
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
+
def get_conservation_dataset_features(
|
| 477 |
+
self,
|
| 478 |
+
item_id: str,
|
| 479 |
+
max_features: int = 1000
|
| 480 |
+
) -> Optional[Dict]:
|
| 481 |
+
"""
|
| 482 |
+
Get features from a specific conservation dataset.
|
| 483 |
+
|
| 484 |
+
Args:
|
| 485 |
+
item_id: ArcGIS item ID
|
| 486 |
+
max_features: Maximum features to return
|
| 487 |
+
|
| 488 |
+
Returns:
|
| 489 |
+
Dictionary with dataset info and GeoJSON features
|
| 490 |
+
"""
|
| 491 |
+
self._ensure_initialized()
|
| 492 |
+
if not self._globil:
|
| 493 |
+
return None
|
| 494 |
+
|
| 495 |
+
return self._globil.get_dataset_with_features(item_id, max_features)
|
| 496 |
+
|
| 497 |
# =========================================================================
|
| 498 |
# Utility Methods
|
| 499 |
# =========================================================================
|
external_apis.py
CHANGED
|
@@ -474,6 +474,161 @@ class GlobILClient:
|
|
| 474 |
|
| 475 |
return feature_layers
|
| 476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
|
| 478 |
# Conservation data topics available in GLOBIL
|
| 479 |
GLOBIL_TOPICS = {
|
|
|
|
| 474 |
|
| 475 |
return feature_layers
|
| 476 |
|
| 477 |
+
def get_feature_service_url(self, item_id: str) -> Optional[str]:
|
| 478 |
+
"""
|
| 479 |
+
Get the Feature Service URL for an ArcGIS item.
|
| 480 |
+
|
| 481 |
+
Args:
|
| 482 |
+
item_id: ArcGIS item ID
|
| 483 |
+
|
| 484 |
+
Returns:
|
| 485 |
+
Feature Service URL or None
|
| 486 |
+
"""
|
| 487 |
+
info = self.get_dataset_info(item_id)
|
| 488 |
+
if info and info.get("url"):
|
| 489 |
+
return info.get("url")
|
| 490 |
+
return None
|
| 491 |
+
|
| 492 |
+
def fetch_features(
|
| 493 |
+
self,
|
| 494 |
+
service_url: str,
|
| 495 |
+
layer_index: int = 0,
|
| 496 |
+
max_features: int = 1000,
|
| 497 |
+
where: str = "1=1",
|
| 498 |
+
out_fields: str = "*",
|
| 499 |
+
return_geometry: bool = True
|
| 500 |
+
) -> Dict:
|
| 501 |
+
"""
|
| 502 |
+
Fetch features from an ArcGIS Feature Service.
|
| 503 |
+
|
| 504 |
+
Args:
|
| 505 |
+
service_url: Base URL of the Feature Service
|
| 506 |
+
layer_index: Layer index (usually 0)
|
| 507 |
+
max_features: Maximum features to return
|
| 508 |
+
where: SQL where clause filter
|
| 509 |
+
out_fields: Fields to return (* for all)
|
| 510 |
+
return_geometry: Whether to include geometries
|
| 511 |
+
|
| 512 |
+
Returns:
|
| 513 |
+
GeoJSON-like dictionary with features
|
| 514 |
+
"""
|
| 515 |
+
# Construct query URL
|
| 516 |
+
query_url = f"{service_url}/{layer_index}/query"
|
| 517 |
+
|
| 518 |
+
params = {
|
| 519 |
+
"where": where,
|
| 520 |
+
"outFields": out_fields,
|
| 521 |
+
"returnGeometry": str(return_geometry).lower(),
|
| 522 |
+
"f": "geojson", # Request GeoJSON format
|
| 523 |
+
"resultRecordCount": max_features
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
try:
|
| 527 |
+
response = self.session.get(
|
| 528 |
+
query_url,
|
| 529 |
+
params=params,
|
| 530 |
+
timeout=API_CONFIG.DEFAULT_TIMEOUT * 2 # Longer timeout for feature data
|
| 531 |
+
)
|
| 532 |
+
response.raise_for_status()
|
| 533 |
+
data = response.json()
|
| 534 |
+
|
| 535 |
+
# Handle ArcGIS error responses
|
| 536 |
+
if "error" in data:
|
| 537 |
+
logger.warning(f"ArcGIS error: {data['error'].get('message', 'Unknown error')}")
|
| 538 |
+
return {"type": "FeatureCollection", "features": []}
|
| 539 |
+
|
| 540 |
+
return data
|
| 541 |
+
|
| 542 |
+
except requests.exceptions.RequestException as e:
|
| 543 |
+
logger.error(f"Feature fetch error: {e}")
|
| 544 |
+
return {"type": "FeatureCollection", "features": []}
|
| 545 |
+
|
| 546 |
+
def search_and_fetch_features(
|
| 547 |
+
self,
|
| 548 |
+
topic: str,
|
| 549 |
+
max_datasets: int = 3,
|
| 550 |
+
max_features_per_dataset: int = 500
|
| 551 |
+
) -> List[Dict]:
|
| 552 |
+
"""
|
| 553 |
+
Search for datasets and fetch actual feature data.
|
| 554 |
+
|
| 555 |
+
Args:
|
| 556 |
+
topic: Conservation topic to search
|
| 557 |
+
max_datasets: Maximum datasets to query
|
| 558 |
+
max_features_per_dataset: Features per dataset
|
| 559 |
+
|
| 560 |
+
Returns:
|
| 561 |
+
List of dictionaries with dataset info and features
|
| 562 |
+
"""
|
| 563 |
+
results = []
|
| 564 |
+
|
| 565 |
+
# Get feature layers for topic
|
| 566 |
+
layers = self.get_public_feature_layers(topic)[:max_datasets]
|
| 567 |
+
|
| 568 |
+
for layer in layers:
|
| 569 |
+
item_id = layer.get("id")
|
| 570 |
+
if not item_id:
|
| 571 |
+
continue
|
| 572 |
+
|
| 573 |
+
# Get feature service URL
|
| 574 |
+
service_url = self.get_feature_service_url(item_id)
|
| 575 |
+
if not service_url:
|
| 576 |
+
continue
|
| 577 |
+
|
| 578 |
+
# Fetch features
|
| 579 |
+
geojson = self.fetch_features(
|
| 580 |
+
service_url,
|
| 581 |
+
max_features=max_features_per_dataset
|
| 582 |
+
)
|
| 583 |
+
|
| 584 |
+
features = geojson.get("features", [])
|
| 585 |
+
if features:
|
| 586 |
+
results.append({
|
| 587 |
+
"dataset": layer,
|
| 588 |
+
"features": features,
|
| 589 |
+
"feature_count": len(features),
|
| 590 |
+
"service_url": service_url
|
| 591 |
+
})
|
| 592 |
+
|
| 593 |
+
return results
|
| 594 |
+
|
| 595 |
+
def get_dataset_with_features(
|
| 596 |
+
self,
|
| 597 |
+
item_id: str,
|
| 598 |
+
max_features: int = 1000
|
| 599 |
+
) -> Optional[Dict]:
|
| 600 |
+
"""
|
| 601 |
+
Get a specific dataset with its features.
|
| 602 |
+
|
| 603 |
+
Args:
|
| 604 |
+
item_id: ArcGIS item ID
|
| 605 |
+
max_features: Maximum features to return
|
| 606 |
+
|
| 607 |
+
Returns:
|
| 608 |
+
Dictionary with dataset info and features
|
| 609 |
+
"""
|
| 610 |
+
info = self.get_dataset_info(item_id)
|
| 611 |
+
if not info:
|
| 612 |
+
return None
|
| 613 |
+
|
| 614 |
+
service_url = info.get("url")
|
| 615 |
+
if not service_url:
|
| 616 |
+
return None
|
| 617 |
+
|
| 618 |
+
geojson = self.fetch_features(service_url, max_features=max_features)
|
| 619 |
+
|
| 620 |
+
return {
|
| 621 |
+
"dataset": {
|
| 622 |
+
"id": item_id,
|
| 623 |
+
"title": info.get("title"),
|
| 624 |
+
"snippet": info.get("snippet"),
|
| 625 |
+
"type": info.get("type"),
|
| 626 |
+
"url": service_url
|
| 627 |
+
},
|
| 628 |
+
"geojson": geojson,
|
| 629 |
+
"feature_count": len(geojson.get("features", []))
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
|
| 633 |
# Conservation data topics available in GLOBIL
|
| 634 |
GLOBIL_TOPICS = {
|