rifatSDAS commited on
Commit
a9015e2
·
1 Parent(s): 9e1384c

feat: Add map and chart visualizations for conservation data

Browse files

- Add GlobILClient.fetch_features() to get actual GeoJSON features from Feature Services
- Add GlobILClient.search_and_fetch_features() for topic-based feature search
- Add GlobILClient.get_dataset_with_features() for specific dataset features
- Add ExternalDataHandler.fetch_conservation_features() wrapper
- Add create_conservation_map() for interactive Folium maps with polygon/point layers
- Add create_conservation_chart() for bar/pie charts of feature distribution
- Update process_query() conservation routing to generate maps and charts
- Improve city name matching for POI queries (Berlin, Germany now works)
- Add logger to app.py for consistent logging

Files changed (3) hide show
  1. app.py +334 -27
  2. data_utils.py +49 -1
  3. external_apis.py +155 -0
app.py CHANGED
@@ -15,6 +15,11 @@ import numpy as np
15
  from pathlib import Path
16
  import warnings
17
  import logging
 
 
 
 
 
18
 
19
  # Suppress GeoPandas CRS warnings (area/centroid calculations are approximate for demo purposes)
20
  warnings.filterwarnings('ignore', message='.*Geometry is in a geographic CRS.*')
@@ -23,6 +28,9 @@ warnings.filterwarnings('ignore', message='.*Geometry is in a geographic CRS.*')
23
  warnings.filterwarnings('ignore', message='.*Invalid file descriptor.*')
24
  logging.getLogger('asyncio').setLevel(logging.CRITICAL)
25
 
 
 
 
26
  import branca.colormap as cm
27
  import zipfile
28
  import urllib.request
@@ -241,15 +249,21 @@ def fetch_poi_data(parsed_query):
241
  city_coords = None
242
  city_name = None
243
  for loc in locations:
244
- loc_lower = loc.lower().replace(",", "").strip()
245
- # Check direct match
 
 
 
246
  if loc_lower in CITY_COORDINATES:
247
  city_coords = CITY_COORDINATES[loc_lower]
248
  city_name = loc
249
  break
250
- # Check partial match
 
251
  for city, coords in CITY_COORDINATES.items():
252
- if city in loc_lower or loc_lower in city:
 
 
253
  city_coords = coords
254
  city_name = loc
255
  break
@@ -393,6 +407,250 @@ def create_conservation_table(datasets):
393
  return pd.DataFrame(rows)
394
 
395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  def fetch_geospatial_data(parsed_query):
397
  """
398
  Fetch and process geospatial data based on parsed query
@@ -900,33 +1158,82 @@ def process_query(user_query, output_format, chart_type, map_style, color_scheme
900
  # EXTERNAL API ROUTING: Conservation Data (WWF GLOBIL)
901
  # =================================================================
902
  if data_source == "conservation":
903
- datasets, status_msg = fetch_conservation_data(parsed)
904
-
905
- if not datasets:
906
- error_msg = f"❌ **Conservation Data Query Failed**\n\n{status_msg}\n\n"
907
- error_msg += "**Available Topics:**\n"
908
- error_msg += "- forests, wildlife, oceans, freshwater, climate\n\n"
909
- error_msg += "**Try queries like:**\n"
910
- error_msg += "- 'Search for deforestation datasets'\n"
911
- error_msg += "- 'Find wildlife conservation data'\n"
912
- error_msg += "- 'Show ocean protection datasets'"
913
- return None, None, None, error_msg, None, None
914
 
915
- # Create conservation datasets table
916
- cons_df = create_conservation_table(datasets)
 
 
917
 
918
- # Save to CSV
919
- csv_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w', encoding='utf-8')
920
- cons_df.to_csv(csv_file.name, index=False)
921
- csv_file = csv_file.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
922
 
923
- summary = f"🔍 **Query:** {user_query}\n\n"
924
- summary += f"📍 **Data Source:** WWF GLOBIL (ArcGIS Hub)\n"
925
- summary += f"🌿 **Topic:** {parsed.get('conservation_topic', 'General')}\n"
926
- summary += f"📊 **Results:** {status_msg}\n\n"
927
- summary += "💡 *Conservation datasets from World Wildlife Fund*"
928
 
929
- return None, None, cons_df, summary, None, csv_file
930
 
931
  # =================================================================
932
  # DEFAULT: Country/Region Data (Natural Earth)
 
15
  from pathlib import Path
16
  import warnings
17
  import logging
18
+ import sys
19
+
20
+ # Suppress Python 3.13 asyncio cleanup warnings (harmless garbage collection issue)
21
+ if sys.version_info >= (3, 13):
22
+ warnings.filterwarnings('ignore', message='.*Invalid file descriptor.*')
23
 
24
  # Suppress GeoPandas CRS warnings (area/centroid calculations are approximate for demo purposes)
25
  warnings.filterwarnings('ignore', message='.*Geometry is in a geographic CRS.*')
 
28
  warnings.filterwarnings('ignore', message='.*Invalid file descriptor.*')
29
  logging.getLogger('asyncio').setLevel(logging.CRITICAL)
30
 
31
+ # Configure logger for this module
32
+ logger = logging.getLogger(__name__)
33
+
34
  import branca.colormap as cm
35
  import zipfile
36
  import urllib.request
 
249
  city_coords = None
250
  city_name = None
251
  for loc in locations:
252
+ # Normalize: lowercase, remove commas, extra spaces
253
+ loc_lower = loc.lower().replace(",", " ").strip()
254
+ loc_words = set(loc_lower.split())
255
+
256
+ # Check direct match first
257
  if loc_lower in CITY_COORDINATES:
258
  city_coords = CITY_COORDINATES[loc_lower]
259
  city_name = loc
260
  break
261
+
262
+ # Check if any city name appears in the location string
263
  for city, coords in CITY_COORDINATES.items():
264
+ city_words = set(city.split())
265
+ # Match if city name is contained in location OR location word matches city
266
+ if city in loc_lower or city_words & loc_words:
267
  city_coords = coords
268
  city_name = loc
269
  break
 
407
  return pd.DataFrame(rows)
408
 
409
 
410
+ def create_conservation_map(feature_data_list, map_style='Light'):
411
+ """
412
+ Create a Folium map showing conservation features from GLOBIL.
413
+
414
+ Args:
415
+ feature_data_list: List of dicts with 'dataset' and 'features' keys
416
+ map_style: Map tile style
417
+
418
+ Returns:
419
+ Folium map object
420
+ """
421
+ if not feature_data_list:
422
+ return None
423
+
424
+ # Collect all coordinates to calculate center
425
+ all_coords = []
426
+
427
+ for data in feature_data_list:
428
+ for feature in data.get('features', []):
429
+ geom = feature.get('geometry', {})
430
+ geom_type = geom.get('type', '')
431
+ coords = geom.get('coordinates', [])
432
+
433
+ if geom_type == 'Point' and len(coords) >= 2:
434
+ all_coords.append((coords[1], coords[0])) # lat, lon
435
+ elif geom_type == 'Polygon' and coords:
436
+ # Get centroid of first ring
437
+ ring = coords[0] if isinstance(coords[0], list) and coords[0] else []
438
+ if ring and len(ring) > 0:
439
+ lons = [c[0] for c in ring if len(c) >= 2]
440
+ lats = [c[1] for c in ring if len(c) >= 2]
441
+ if lons and lats:
442
+ all_coords.append((sum(lats)/len(lats), sum(lons)/len(lons)))
443
+ elif geom_type == 'MultiPolygon' and coords:
444
+ for polygon in coords:
445
+ if polygon and polygon[0]:
446
+ ring = polygon[0]
447
+ lons = [c[0] for c in ring if len(c) >= 2]
448
+ lats = [c[1] for c in ring if len(c) >= 2]
449
+ if lons and lats:
450
+ all_coords.append((sum(lats)/len(lats), sum(lons)/len(lons)))
451
+
452
+ if not all_coords:
453
+ # Default to world center if no coordinates found
454
+ center_lat, center_lon = 20, 0
455
+ else:
456
+ center_lat = sum(c[0] for c in all_coords) / len(all_coords)
457
+ center_lon = sum(c[1] for c in all_coords) / len(all_coords)
458
+
459
+ tiles = MAP_STYLES.get(map_style, 'CartoDB positron')
460
+ m = folium.Map(location=[center_lat, center_lon], zoom_start=3, tiles=tiles)
461
+
462
+ # Color palette for different datasets
463
+ colors = ['#2ecc71', '#3498db', '#9b59b6', '#e74c3c', '#f39c12', '#1abc9c']
464
+
465
+ for i, data in enumerate(feature_data_list):
466
+ dataset = data.get('dataset', {})
467
+ features = data.get('features', [])
468
+ dataset_title = dataset.get('title', f'Dataset {i+1}')
469
+ color = colors[i % len(colors)]
470
+
471
+ # Create a feature group for this dataset
472
+ fg = folium.FeatureGroup(name=dataset_title)
473
+
474
+ for feature in features[:100]: # Limit features per dataset
475
+ geom = feature.get('geometry', {})
476
+ props = feature.get('properties', {})
477
+ geom_type = geom.get('type', '')
478
+
479
+ # Build popup content
480
+ popup_lines = [f"<b>{dataset_title}</b>"]
481
+ for key, value in list(props.items())[:5]: # Show first 5 properties
482
+ if value and str(value).strip():
483
+ popup_lines.append(f"{key}: {value}")
484
+ popup_html = "<br>".join(popup_lines)
485
+
486
+ try:
487
+ if geom_type == 'Point':
488
+ coords = geom.get('coordinates', [])
489
+ if len(coords) >= 2:
490
+ folium.CircleMarker(
491
+ location=[coords[1], coords[0]],
492
+ radius=6,
493
+ color=color,
494
+ fill=True,
495
+ fillColor=color,
496
+ fillOpacity=0.7,
497
+ popup=popup_html
498
+ ).add_to(fg)
499
+
500
+ elif geom_type in ['Polygon', 'MultiPolygon']:
501
+ folium.GeoJson(
502
+ feature,
503
+ style_function=lambda x, c=color: {
504
+ 'fillColor': c,
505
+ 'color': c,
506
+ 'weight': 2,
507
+ 'fillOpacity': 0.4
508
+ },
509
+ popup=folium.Popup(popup_html, max_width=300)
510
+ ).add_to(fg)
511
+
512
+ elif geom_type in ['LineString', 'MultiLineString']:
513
+ folium.GeoJson(
514
+ feature,
515
+ style_function=lambda x, c=color: {
516
+ 'color': c,
517
+ 'weight': 3
518
+ },
519
+ popup=folium.Popup(popup_html, max_width=300)
520
+ ).add_to(fg)
521
+ except Exception as e:
522
+ logger.debug(f"Error adding feature: {e}")
523
+ continue
524
+
525
+ fg.add_to(m)
526
+
527
+ # Add layer control if multiple datasets
528
+ if len(feature_data_list) > 1:
529
+ folium.LayerControl().add_to(m)
530
+
531
+ return m
532
+
533
+
534
+ def create_conservation_chart(feature_data_list, chart_type='bar'):
535
+ """
536
+ Create a Plotly chart from conservation feature data.
537
+
538
+ Args:
539
+ feature_data_list: List of dicts with 'dataset' and 'features' keys
540
+ chart_type: Type of chart ('bar', 'pie')
541
+
542
+ Returns:
543
+ Plotly figure
544
+ """
545
+ if not feature_data_list:
546
+ return None
547
+
548
+ # Aggregate data by dataset
549
+ chart_data = []
550
+ for data in feature_data_list:
551
+ dataset = data.get('dataset', {})
552
+ features = data.get('features', [])
553
+ chart_data.append({
554
+ 'Dataset': dataset.get('title', 'Unknown')[:40],
555
+ 'Feature Count': len(features),
556
+ 'Views': dataset.get('views', 0)
557
+ })
558
+
559
+ if not chart_data:
560
+ return None
561
+
562
+ df = pd.DataFrame(chart_data)
563
+
564
+ if chart_type == 'pie':
565
+ fig = px.pie(
566
+ df,
567
+ values='Feature Count',
568
+ names='Dataset',
569
+ title='Conservation Features by Dataset',
570
+ color_discrete_sequence=px.colors.qualitative.Set2
571
+ )
572
+ else: # Default to bar
573
+ fig = px.bar(
574
+ df,
575
+ x='Dataset',
576
+ y='Feature Count',
577
+ color='Dataset',
578
+ title='Conservation Features by Dataset',
579
+ color_discrete_sequence=px.colors.qualitative.Set2
580
+ )
581
+ fig.update_layout(xaxis_tickangle=-45)
582
+
583
+ fig.update_layout(
584
+ template='plotly_white',
585
+ showlegend=True,
586
+ height=400
587
+ )
588
+
589
+ return fig
590
+
591
+
592
+ def fetch_conservation_features_for_query(parsed_query):
593
+ """
594
+ Fetch conservation features based on parsed query.
595
+
596
+ Returns:
597
+ Tuple of (feature_data_list, status_message, metadata_list)
598
+ """
599
+ if not EXTERNAL_APIS_AVAILABLE or external_data is None:
600
+ return [], "External APIs not available", []
601
+
602
+ topic = parsed_query.get("conservation_topic", "forests")
603
+
604
+ # Topic detection from query
605
+ query_keywords = {
606
+ "deforestation": "forests",
607
+ "forest": "forests",
608
+ "wildlife": "wildlife",
609
+ "endangered": "wildlife",
610
+ "species": "wildlife",
611
+ "marine": "oceans",
612
+ "ocean": "oceans",
613
+ "coral": "oceans",
614
+ "protected": "wildlife",
615
+ "river": "freshwater",
616
+ "water": "freshwater",
617
+ "climate": "climate",
618
+ "carbon": "climate"
619
+ }
620
+
621
+ all_text = " ".join(parsed_query.get("locations", []) + parsed_query.get("indicators", [])).lower()
622
+ for keyword, topic_name in query_keywords.items():
623
+ if keyword in all_text:
624
+ topic = topic_name
625
+ break
626
+
627
+ try:
628
+ # Fetch features from GLOBIL
629
+ feature_data = external_data.fetch_conservation_features(
630
+ topic,
631
+ max_datasets=3,
632
+ max_features=200
633
+ )
634
+
635
+ if not feature_data:
636
+ # Fallback to metadata only
637
+ datasets = external_data.search_conservation_data(topic, limit=10)
638
+ return [], f"No feature data available for {topic}. Showing metadata only.", datasets
639
+
640
+ total_features = sum(d.get('feature_count', 0) for d in feature_data)
641
+ datasets_found = len(feature_data)
642
+
643
+ return (
644
+ feature_data,
645
+ f"Found {total_features} features from {datasets_found} datasets about {topic}",
646
+ [d.get('dataset', {}) for d in feature_data]
647
+ )
648
+
649
+ except Exception as e:
650
+ logger.error(f"Error fetching conservation features: {e}")
651
+ return [], f"Error fetching data: {str(e)}", []
652
+
653
+
654
  def fetch_geospatial_data(parsed_query):
655
  """
656
  Fetch and process geospatial data based on parsed query
 
1158
  # EXTERNAL API ROUTING: Conservation Data (WWF GLOBIL)
1159
  # =================================================================
1160
  if data_source == "conservation":
1161
+ # Try to fetch actual feature data first
1162
+ feature_data, status_msg, metadata = fetch_conservation_features_for_query(parsed)
 
 
 
 
 
 
 
 
 
1163
 
1164
+ map_html = None
1165
+ chart_fig = None
1166
+ map_file = None
1167
+ csv_file = None
1168
 
1169
+ if feature_data:
1170
+ # We have actual feature data - create map and chart
1171
+ cons_map = create_conservation_map(feature_data, map_style)
1172
+ if cons_map:
1173
+ map_html = cons_map._repr_html_()
1174
+ map_file = tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8')
1175
+ cons_map.save(map_file.name)
1176
+ map_file = map_file.name
1177
+
1178
+ # Create chart showing feature counts
1179
+ chart_fig = create_conservation_chart(feature_data, chart_type if chart_type in ['bar', 'pie'] else 'bar')
1180
+
1181
+ # Create table from feature properties
1182
+ table_rows = []
1183
+ for data in feature_data:
1184
+ dataset_title = data.get('dataset', {}).get('title', 'Unknown')
1185
+ for feat in data.get('features', [])[:50]: # Limit rows
1186
+ props = feat.get('properties', {})
1187
+ row = {'Dataset': dataset_title}
1188
+ # Get first few meaningful properties
1189
+ for key, val in list(props.items())[:4]:
1190
+ if val and str(val).strip() and key.lower() not in ['objectid', 'fid', 'shape']:
1191
+ row[key] = str(val)[:50]
1192
+ if len(row) > 1:
1193
+ table_rows.append(row)
1194
+
1195
+ if table_rows:
1196
+ cons_df = pd.DataFrame(table_rows)
1197
+ else:
1198
+ # Fallback to metadata table
1199
+ cons_df = create_conservation_table(metadata)
1200
+
1201
+ summary = f"🔍 **Query:** {user_query}\n\n"
1202
+ summary += f"📍 **Data Source:** WWF GLOBIL (ArcGIS Hub)\n"
1203
+ summary += f"🌿 **Topic:** {parsed.get('conservation_topic', 'General')}\n"
1204
+ summary += f"📊 **Results:** {status_msg}\n\n"
1205
+ summary += "🗺️ *Map shows conservation areas with colored layers*\n"
1206
+ summary += "📊 *Chart shows feature distribution across datasets*"
1207
+ else:
1208
+ # Fallback to metadata only (no feature data available)
1209
+ datasets, fallback_msg = fetch_conservation_data(parsed)
1210
+
1211
+ if not datasets:
1212
+ error_msg = f"❌ **Conservation Data Query Failed**\n\n{fallback_msg}\n\n"
1213
+ error_msg += "**Available Topics:**\n"
1214
+ error_msg += "- forests, wildlife, oceans, freshwater, climate\n\n"
1215
+ error_msg += "**Try queries like:**\n"
1216
+ error_msg += "- 'Search for deforestation datasets'\n"
1217
+ error_msg += "- 'Find wildlife conservation data'\n"
1218
+ error_msg += "- 'Show ocean protection datasets'"
1219
+ return None, None, None, error_msg, None, None
1220
+
1221
+ cons_df = create_conservation_table(datasets)
1222
+
1223
+ summary = f"🔍 **Query:** {user_query}\n\n"
1224
+ summary += f"📍 **Data Source:** WWF GLOBIL (ArcGIS Hub)\n"
1225
+ summary += f"🌿 **Topic:** {parsed.get('conservation_topic', 'General')}\n"
1226
+ summary += f"📊 **Results:** {status_msg or fallback_msg}\n\n"
1227
+ summary += "ℹ️ *Feature geometries not available for these datasets*\n"
1228
+ summary += "💡 *Showing dataset metadata table*"
1229
 
1230
+ # Save table to CSV
1231
+ if cons_df is not None and not cons_df.empty:
1232
+ csv_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w', encoding='utf-8')
1233
+ cons_df.to_csv(csv_file.name, index=False)
1234
+ csv_file = csv_file.name
1235
 
1236
+ return map_html, chart_fig, cons_df, summary, map_file, csv_file
1237
 
1238
  # =================================================================
1239
  # DEFAULT: Country/Region Data (Natural Earth)
data_utils.py CHANGED
@@ -445,7 +445,55 @@ class ExternalDataHandler:
445
  return []
446
 
447
  return self._globil.get_public_feature_layers(category)
448
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
  # =========================================================================
450
  # Utility Methods
451
  # =========================================================================
 
445
  return []
446
 
447
  return self._globil.get_public_feature_layers(category)
448
+
449
+ def fetch_conservation_features(
450
+ self,
451
+ topic: str,
452
+ max_datasets: int = 3,
453
+ max_features: int = 500
454
+ ) -> List[Dict]:
455
+ """
456
+ Fetch actual feature data from conservation datasets.
457
+
458
+ Args:
459
+ topic: Conservation topic to search
460
+ max_datasets: Maximum datasets to query
461
+ max_features: Maximum features per dataset
462
+
463
+ Returns:
464
+ List of dictionaries with dataset info and GeoJSON features
465
+ """
466
+ self._ensure_initialized()
467
+ if not self._globil:
468
+ return []
469
+
470
+ return self._globil.search_and_fetch_features(
471
+ topic,
472
+ max_datasets=max_datasets,
473
+ max_features_per_dataset=max_features
474
+ )
475
+
476
+ def get_conservation_dataset_features(
477
+ self,
478
+ item_id: str,
479
+ max_features: int = 1000
480
+ ) -> Optional[Dict]:
481
+ """
482
+ Get features from a specific conservation dataset.
483
+
484
+ Args:
485
+ item_id: ArcGIS item ID
486
+ max_features: Maximum features to return
487
+
488
+ Returns:
489
+ Dictionary with dataset info and GeoJSON features
490
+ """
491
+ self._ensure_initialized()
492
+ if not self._globil:
493
+ return None
494
+
495
+ return self._globil.get_dataset_with_features(item_id, max_features)
496
+
497
  # =========================================================================
498
  # Utility Methods
499
  # =========================================================================
external_apis.py CHANGED
@@ -474,6 +474,161 @@ class GlobILClient:
474
 
475
  return feature_layers
476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
  # Conservation data topics available in GLOBIL
479
  GLOBIL_TOPICS = {
 
474
 
475
  return feature_layers
476
 
477
+ def get_feature_service_url(self, item_id: str) -> Optional[str]:
478
+ """
479
+ Get the Feature Service URL for an ArcGIS item.
480
+
481
+ Args:
482
+ item_id: ArcGIS item ID
483
+
484
+ Returns:
485
+ Feature Service URL or None
486
+ """
487
+ info = self.get_dataset_info(item_id)
488
+ if info and info.get("url"):
489
+ return info.get("url")
490
+ return None
491
+
492
+ def fetch_features(
493
+ self,
494
+ service_url: str,
495
+ layer_index: int = 0,
496
+ max_features: int = 1000,
497
+ where: str = "1=1",
498
+ out_fields: str = "*",
499
+ return_geometry: bool = True
500
+ ) -> Dict:
501
+ """
502
+ Fetch features from an ArcGIS Feature Service.
503
+
504
+ Args:
505
+ service_url: Base URL of the Feature Service
506
+ layer_index: Layer index (usually 0)
507
+ max_features: Maximum features to return
508
+ where: SQL where clause filter
509
+ out_fields: Fields to return (* for all)
510
+ return_geometry: Whether to include geometries
511
+
512
+ Returns:
513
+ GeoJSON-like dictionary with features
514
+ """
515
+ # Construct query URL
516
+ query_url = f"{service_url}/{layer_index}/query"
517
+
518
+ params = {
519
+ "where": where,
520
+ "outFields": out_fields,
521
+ "returnGeometry": str(return_geometry).lower(),
522
+ "f": "geojson", # Request GeoJSON format
523
+ "resultRecordCount": max_features
524
+ }
525
+
526
+ try:
527
+ response = self.session.get(
528
+ query_url,
529
+ params=params,
530
+ timeout=API_CONFIG.DEFAULT_TIMEOUT * 2 # Longer timeout for feature data
531
+ )
532
+ response.raise_for_status()
533
+ data = response.json()
534
+
535
+ # Handle ArcGIS error responses
536
+ if "error" in data:
537
+ logger.warning(f"ArcGIS error: {data['error'].get('message', 'Unknown error')}")
538
+ return {"type": "FeatureCollection", "features": []}
539
+
540
+ return data
541
+
542
+ except requests.exceptions.RequestException as e:
543
+ logger.error(f"Feature fetch error: {e}")
544
+ return {"type": "FeatureCollection", "features": []}
545
+
546
+ def search_and_fetch_features(
547
+ self,
548
+ topic: str,
549
+ max_datasets: int = 3,
550
+ max_features_per_dataset: int = 500
551
+ ) -> List[Dict]:
552
+ """
553
+ Search for datasets and fetch actual feature data.
554
+
555
+ Args:
556
+ topic: Conservation topic to search
557
+ max_datasets: Maximum datasets to query
558
+ max_features_per_dataset: Features per dataset
559
+
560
+ Returns:
561
+ List of dictionaries with dataset info and features
562
+ """
563
+ results = []
564
+
565
+ # Get feature layers for topic
566
+ layers = self.get_public_feature_layers(topic)[:max_datasets]
567
+
568
+ for layer in layers:
569
+ item_id = layer.get("id")
570
+ if not item_id:
571
+ continue
572
+
573
+ # Get feature service URL
574
+ service_url = self.get_feature_service_url(item_id)
575
+ if not service_url:
576
+ continue
577
+
578
+ # Fetch features
579
+ geojson = self.fetch_features(
580
+ service_url,
581
+ max_features=max_features_per_dataset
582
+ )
583
+
584
+ features = geojson.get("features", [])
585
+ if features:
586
+ results.append({
587
+ "dataset": layer,
588
+ "features": features,
589
+ "feature_count": len(features),
590
+ "service_url": service_url
591
+ })
592
+
593
+ return results
594
+
595
+ def get_dataset_with_features(
596
+ self,
597
+ item_id: str,
598
+ max_features: int = 1000
599
+ ) -> Optional[Dict]:
600
+ """
601
+ Get a specific dataset with its features.
602
+
603
+ Args:
604
+ item_id: ArcGIS item ID
605
+ max_features: Maximum features to return
606
+
607
+ Returns:
608
+ Dictionary with dataset info and features
609
+ """
610
+ info = self.get_dataset_info(item_id)
611
+ if not info:
612
+ return None
613
+
614
+ service_url = info.get("url")
615
+ if not service_url:
616
+ return None
617
+
618
+ geojson = self.fetch_features(service_url, max_features=max_features)
619
+
620
+ return {
621
+ "dataset": {
622
+ "id": item_id,
623
+ "title": info.get("title"),
624
+ "snippet": info.get("snippet"),
625
+ "type": info.get("type"),
626
+ "url": service_url
627
+ },
628
+ "geojson": geojson,
629
+ "feature_count": len(geojson.get("features", []))
630
+ }
631
+
632
 
633
  # Conservation data topics available in GLOBIL
634
  GLOBIL_TOPICS = {