Spaces:

UFOSINT
/

UAP-Data-Analysis-Tool

Sleeping

App Files Files Community

UAP-Data-Analysis-Tool / api /services /map_service.py

Ashoka74

Deploy current work to HF Space (slim)

a1aef88 29 days ago

Raw

History Blame Contribute Delete

7.64 kB

	"""
	map_service.py
	--------------
	Encapsulates all Kepler.gl HTML generation logic.

	Key design decisions:
	- Results are cached by a data fingerprint (hash of the DataFrame shape +
	first/last row content). When the user changes filters the fingerprint
	changes, a new HTML payload is generated, and the old entry is evicted.
	- We keep at most `_MAX_CACHE_ENTRIES` cached payloads to bound memory
	use when many sessions are active.
	- The heavy `df.copy()` that previously lived inside the endpoint is
	eliminated: we only copy the minimal columns we actually need for the map.
	"""
	from __future__ import annotations

	import hashlib
	import logging
	import os
	import sys
	import traceback
	from collections import OrderedDict
	from typing import Optional

	import pandas as pd

	logger = logging.getLogger(__name__)

	_MAX_CACHE_ENTRIES = 20

	# ---------------------------------------------------------------------------
	# Internal helpers
	# ---------------------------------------------------------------------------

	def _fingerprint(df: pd.DataFrame) -> str:
	"""
	Fast, deterministic fingerprint for a DataFrame.

	Uses shape + a hash of a small sample (first & last 5 rows rendered as
	CSV) so the cost is O(1) with respect to the total number of rows.
	"""
	sample = pd.concat([df.head(5), df.tail(5)])
	raw = f"{df.shape}\|{sample.to_csv(index=False)}"
	return hashlib.md5(raw.encode("utf-8", errors="replace")).hexdigest()


	def _build_html(df: pd.DataFrame) -> str:
	"""
	Pure function: receive a (possibly large) DataFrame, return a Kepler.gl
	HTML string.
	"""
	import json
	from keplergl import KeplerGl # noqa: PLC0415

	# DECOUPLED: Use the new lightweight utils instead of map.py
	from api.utils.data_utils import auto_create_date_column, sanitize_dataframe_for_json # noqa: PLC0415

	# Path setup
	base_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
	config_path = os.path.join(base_dir, "military_config.kgl")
	bases_path = os.path.join(base_dir, "secret_bases.csv")
	power_path = os.path.join(base_dir, "global_power_plant_database.csv")

	kmap = KeplerGl(height=800)

	# 1. Load configuration if available
	config = None
	if os.path.exists(config_path):
	try:
	with open(config_path, "r", encoding="utf-8") as f:
	config = json.load(f)
	except Exception as e:
	logger.error(f"Failed to load military config: {e}")

	# 2. Add Auxiliary Data (Power Plants & Secret Bases)
	if os.path.exists(power_path):
	try:
	pp_df = pd.read_csv(power_path)
	# Filter for nuclear as per config multiSelect
	nuke_df = pp_df[pp_df["primary_fuel"] == "Nuclear"].copy()
	nuke_df["icon"] = "control-on" # Required for icon layer
	kmap.add_data(data=nuke_df, name="nuclear_powerplants")
	except Exception as e:
	logger.error(f"Failed to load power plant data: {e}")

	if os.path.exists(bases_path):
	try:
	bases_df = pd.read_csv(bases_path)
	bases_df["icon"] = "draw-shape"
	kmap.add_data(data=bases_df, name="secret_bases")
	except Exception as e:
	logger.error(f"Failed to load secret bases: {e}")

	# 3. Add user's UAP data
	df = auto_create_date_column(df)
	df = sanitize_dataframe_for_json(df)

	map_cols = list(df.columns)
	lat_candidates = [c for c in map_cols if str(c).lower() in {"lat", "latitude", "city_latitude"}]
	lon_candidates = [c for c in map_cols if str(c).lower() in {"lon", "lng", "longitude", "city_longitude"}]

	if lat_candidates and lon_candidates:
	lat_col = lat_candidates[0]
	lon_col = lon_candidates[0]
	needed_cols = list(set(map_cols) & set(df.columns))
	df_map = df[needed_cols].copy()
	df_map[lat_col] = pd.to_numeric(df_map[lat_col], errors="coerce")
	df_map[lon_col] = pd.to_numeric(df_map[lon_col], errors="coerce")
	df_map = df_map.dropna(subset=[lat_col, lon_col])

	# 4. DYNAMIC VIEWPORT (Phase 14)
	# Calculate center and zoom based on the current sightings data
	if not df_map.empty:
	lat_mean = float(df_map[lat_col].mean())
	lon_mean = float(df_map[lon_col].mean())

	# Simple zoom heuristic based on spread
	lat_range = df_map[lat_col].max() - df_map[lat_col].min()
	lon_range = df_map[lon_col].max() - df_map[lon_col].min()
	max_range = max(lat_range, lon_range)

	# log-based zoom approximation: 0 is whole world (~360), 10-12 is city
	if max_range > 100: zoom = 2
	elif max_range > 30: zoom = 3
	elif max_range > 10: zoom = 4
	elif max_range > 5: zoom = 5
	elif max_range > 2: zoom = 6
	elif max_range > 1: zoom = 7
	else: zoom = 8

	if config:
	if "mapState" not in config:
	config["mapState"] = {}
	config["mapState"]["latitude"] = lat_mean
	config["mapState"]["longitude"] = lon_mean
	config["mapState"]["zoom"] = zoom
	logger.info(f"Dynamically centering map at {lat_mean:.2f}, {lon_mean:.2f} (zoom={zoom})")

	# Data ID aligned with military_config.kgl
	kmap.add_data(data=df_map, name="uap_sightings")
	else:
	kmap.add_data(data=df, name="uap_sightings")

	if config:
	kmap.config = config

	html_bytes = kmap._repr_html_()
	return html_bytes.decode("utf-8") if isinstance(html_bytes, bytes) else html_bytes


	# ---------------------------------------------------------------------------
	# Public service
	# ---------------------------------------------------------------------------

	class MapService:
	"""
	Singleton-style service that generates and caches Kepler.gl HTML payloads.
	"""
	_cache: OrderedDict[str, str] = OrderedDict()

	@classmethod
	def get_or_generate(cls, df: pd.DataFrame) -> tuple[str, bool]:
	"""
	Return (html_string, cache_hit).
	"""
	# Fingerprint logic remains the same, but we could add config mtime if it changes often
	key = _fingerprint(df)

	if key in cls._cache:
	# Promote to most-recently-used
	cls._cache.move_to_end(key)
	logger.info("MapService: cache HIT (key=%s…)", key[:8])
	return cls._cache[key], True

	logger.info("MapService: cache MISS — generating HTML (key=%s…)", key[:8])
	html = _build_html(df)

	# Evict oldest entry when the cache is full
	if len(cls._cache) >= _MAX_CACHE_ENTRIES:
	evicted = next(iter(cls._cache))
	cls._cache.pop(evicted)
	logger.debug("MapService: evicted cache entry %s…", evicted[:8])

	cls._cache[key] = html
	return html, False

	@classmethod
	def invalidate(cls, df: Optional[pd.DataFrame] = None) -> None:
	"""
	Invalidate a specific entry (if df is provided) or the whole cache.
	"""
	if df is None:
	cls._cache.clear()
	logger.info("MapService: full cache cleared")
	else:
	key = _fingerprint(df)
	cls._cache.pop(key, None)
	logger.info("MapService: cache entry %s… invalidated", key[:8])