Spaces:
Sleeping
Sleeping
File size: 18,215 Bytes
515b9d8 333e6c8 515b9d8 2a69ffc 515b9d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 |
# -*- coding: utf-8 -*-
"""Updated_proto_KDE_saving.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1FaE0wh8yJYv3lxVbhyN4r9eHUNBHWAOX
"""
# ==============================================================================
# CELL 1: SETUP AND CONSOLIDATED IMPORTS
# ==============================================================================
import gradio as gr
import os
import json
import uuid
import shutil
import zipfile
import pathlib
import tempfile
import pandas as pd
import PIL.Image
from datetime import datetime
import huggingface_hub
import autogluon.multimodal
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors
import folium
from scipy.stats import gaussian_kde
from datasets import load_dataset
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
# ==============================================================================
# CELL 2: CORE LOGIC FOR TAB 1 (UNCHANGED)
# ==============================================================================
# --- Functions for Data Capture ---
def get_current_time():
return datetime.now().isoformat()
def handle_time_capture():
timestamp = get_current_time()
status_msg = f"π **Time Captured**: {timestamp}"
return status_msg, timestamp
def get_gps_js():
return """
() => {
if (!navigator.geolocation) { alert("Geolocation not supported"); return; }
navigator.geolocation.getCurrentPosition(
function(position) {
const latBox = document.querySelector('#lat textarea');
const lonBox = document.querySelector('#lon textarea');
const accuracyBox = document.querySelector('#accuracy textarea');
const timestampBox = document.querySelector('#device_ts textarea');
if (latBox && lonBox && accuracyBox && timestampBox) {
latBox.value = position.coords.latitude.toString();
lonBox.value = position.coords.longitude.toString();
accuracyBox.value = position.coords.accuracy.toString();
timestampBox.value = new Date().toISOString();
latBox.dispatchEvent(new Event('input', { bubbles: true }));
lonBox.dispatchEvent(new Event('input', { bubbles: true }));
accuracyBox.dispatchEvent(new Event('input', { bubbles: true }));
timestampBox.dispatchEvent(new Event('input', { bubbles: true }));
} else { alert("Error: Could not find GPS input fields"); }
},
function(err) { alert("GPS Error: " + err.message); },
{ enableHighAccuracy: true, timeout: 10000 }
);
}
"""
def save_to_dataset(image, lat, lon, accuracy_m, device_ts):
if image is None:
return "β **Error**: Please capture or upload a photo first.", ""
mock_data = {
"image": "image.jpg", "latitude": lat, "longitude": lon,
"accuracy_m": accuracy_m, "device_timestamp": device_ts,
"status": "Saving Disabled"
}
status = "β
**Test Save Successful!** (No data saved)"
return status, json.dumps(mock_data, indent=2)
placeholder_time_capture = handle_time_capture
placeholder_save_action = save_to_dataset
# --- Functions for Model Prediction ---
MODEL_REPO_ID = "ddecosmo/lanternfly_classifier"
ZIP_FILENAME = "autogluon_image_predictor_dir.zip"
CLASS_LABELS = {0: "Lanternfly", 1: "Other Insect", 2: "No Insect"}
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"
PREDICTOR = None
def _prepare_predictor_dir():
CACHE_DIR.mkdir(parents=True, exist_ok=True)
token = os.getenv("HF_TOKEN", None)
local_zip = huggingface_hub.hf_hub_download(
repo_id=MODEL_REPO_ID, filename=ZIP_FILENAME, repo_type="model",
token=token, local_dir=str(CACHE_DIR), local_dir_use_symlinks=False,
)
if EXTRACT_DIR.exists(): shutil.rmtree(EXTRACT_DIR)
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(local_zip, "r") as zf: zf.extractall(str(EXTRACT_DIR))
contents = list(EXTRACT_DIR.iterdir())
return str(contents[0]) if (len(contents) == 1 and contents[0].is_dir()) else str(EXTRACT_DIR)
try:
PREDICTOR_DIR = _prepare_predictor_dir()
PREDICTOR = autogluon.multimodal.MultiModalPredictor.load(PREDICTOR_DIR)
PREDICTOR_LOAD_STATUS = "β
AutoGluon Predictor loaded successfully."
print(PREDICTOR_LOAD_STATUS)
except Exception as e:
PREDICTOR_LOAD_STATUS = f"β Failed to load AutoGluon Predictor: {e}"
print(PREDICTOR_LOAD_STATUS)
PREDICTOR = None
def do_predict(pil_img: PIL.Image.Image):
if PREDICTOR is None: return {"Error": 1.0}, "Model not loaded.", ""
if pil_img is None: return {"No Image": 1.0}, "No image provided.", ""
tmpdir = pathlib.Path(tempfile.mkdtemp())
img_path = tmpdir / "input.png"
pil_img.save(img_path)
df = pd.DataFrame({"image": [str(img_path)]})
proba_df = PREDICTOR.predict_proba(df).rename(columns=CLASS_LABELS)
row = proba_df.iloc[0]
pretty_dict = {label: float(row.get(label, 0.0)) for label in CLASS_LABELS.values()}
confidence_info = ", ".join([f"{label}: {prob:.2f}" for label, prob in pretty_dict.items()])
return pretty_dict, confidence_info
# ==============================================================================
# CELL 3: CORE LOGIC FOR TAB 2 (KDE ANALYSIS)
# ==============================================================================
pittsburgh_lat_min = 40.43950159029883
pittsburgh_lat_max = 40.44787067820301
pittsburgh_lon_min = -79.95054304624013
pittsburgh_lon_max = -79.93588847945053
def load_dataframe_from_huggingface():
try:
print("Loading data directly from Hugging Face dataset...")
dataset = load_dataset("rlogh/lanternfly-data", data_files="metadata/entries.jsonl", split="train")
df = dataset.to_pandas()
print("β
Data successfully loaded into a DataFrame.")
return df
except Exception as e:
print(f"β Error loading data from Hugging Face: {e}")
return None
def calculate_kde_from_dataframe(df):
try:
if 'latitude' not in df.columns or 'longitude' not in df.columns:
return None, None, None, "Error: DataFrame must contain 'latitude' and 'longitude' columns."
df.dropna(subset=['latitude', 'longitude'], inplace=True)
latitudes = df['latitude'].values
longitudes = df['longitude'].values
coordinates = np.vstack([longitudes, latitudes])
kde_object = gaussian_kde(coordinates)
return latitudes, longitudes, kde_object, None
except Exception as e:
return None, None, None, f"Error calculating KDE from DataFrame: {e}"
import math
def find_hotspot_landmark(original_latitudes, original_longitudes, kde_object):
"""
Finds the hotspot and identifies the closest landmark from a predefined
custom list of campus locations.
"""
# 1. Create your own dictionary of important campus landmarks
CAMPUS_LANDMARKS = {
"Scaife Hall": (40.441742986804336, -79.94725195600002),
"Hunt Library": (40.44097574857165, -79.94362666281333),
"Cohon University Center": (40.44401378993309, -79.94172335009584),
"Gates Hillman Complex": (40.4436463605335, -79.94442701667683),
"Wean Hall": (40.44267896399903, -79.94582169457243),
"Gesling Stadium": (40.443038206822905, -79.94038027450188),
"The Fence": (40.44221744932438, -79.9435687098247)
}
# 2. Find the coordinates of the densest point (same as before)
all_coords = np.vstack([original_longitudes, original_latitudes])
densities = kde_object(all_coords)
hotspot_index = np.argmax(densities)
hotspot_lat = original_latitudes[hotspot_index]
hotspot_lon = original_longitudes[hotspot_index]
# 3. Function to calculate the distance between two coordinates
def distance(lat1, lon1, lat2, lon2):
# A simple Euclidean distance is good enough for a small area like a campus
return math.sqrt((lat1 - lat2)**2 + (lon1 - lon2)**2)
# 4. Find the landmark from your list with the smallest distance to the hotspot
closest_landmark = min(
CAMPUS_LANDMARKS.keys(),
key=lambda landmark: distance(hotspot_lat, hotspot_lon, CAMPUS_LANDMARKS[landmark][0], CAMPUS_LANDMARKS[landmark][1])
)
return f"π **Hotspot Analysis**: The highest concentration was found closest to **{closest_landmark}** on campus."
def plot_kde_and_points_for_gradio(min_lat, max_lat, min_lon, max_lon, original_latitudes, original_longitudes, kde_object):
heatmap_path = "lanternfly_kde_heatmap.png"
x, y = np.mgrid[min_lon:max_lon:100j, min_lat:max_lat:100j]
positions = np.vstack([x.ravel(), y.ravel()])
z = kde_object(positions).reshape(x.shape)
z_normalized = (z - z.min()) / (z.max() - z.min()) if z.max() > z.min() else np.zeros_like(z)
fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(z_normalized.T, origin='lower', extent=[min_lon, max_lon, min_lat, max_lat], cmap='hot', aspect='auto')
fig.colorbar(im, ax=ax, label='Normalized Density (0-1)')
ax.set_title('Lanternfly Sightings KDE Heatmap (Static)')
plt.savefig(heatmap_path, bbox_inches='tight')
plt.close(fig)
m_colored_points = folium.Map()
bounds = [[min_lat, min_lon], [max_lat, max_lon]]
m_colored_points.fit_bounds(bounds)
original_coordinates = np.vstack([original_longitudes, original_latitudes])
density_at_points = kde_object(original_coordinates)
density_normalized_for_color = (density_at_points - density_at_points.min()) / (density_at_points.max() - density_at_points.min() + 1e-9)
max_density = density_at_points.max()
colormap = cm.get_cmap('viridis')
for lat, lon, density_norm_color in zip(original_latitudes, original_longitudes, density_normalized_for_color):
if min_lat <= lat <= max_lat and min_lon <= lon <= max_lon:
color = matplotlib.colors.rgb2hex(colormap(density_norm_color))
raw_density = kde_object([lon, lat])[0]
normalized_tooltip_density = raw_density / max_density if max_density > 0 else 0
folium.CircleMarker(
location=[lat, lon], radius=5, color=color, fill=True,
fill_color=color, fill_opacity=0.7,
tooltip=f"Normalized Density: {normalized_tooltip_density:.4f}"
).add_to(m_colored_points)
return heatmap_path, m_colored_points._repr_html_()
import joblib # Make sure this is imported at the top
def load_kde_from_hub():
"""
Downloads the pre-trained KDE model from the Hugging Face Hub and loads it.
"""
try:
print("Downloading pre-trained KDE model...")
model_path = huggingface_hub.hf_hub_download(
repo_id="ddecosmo/lanternfly-kde-model", # Use the same repo_id from the upload script
filename="kde_model.joblib",
repo_type="model"
)
kde_model = joblib.load(model_path)
print("β
Pre-trained KDE model loaded.")
return kde_model
except Exception as e:
print(f"β Failed to load KDE model from Hub: {e}")
return None
def run_full_analysis_and_update_ui():
"""
This function is now much faster. It loads the pre-trained KDE and all
the raw data points for visualization.
"""
# --- Load both the pre-trained model and the raw data ---
kde_object = load_kde_from_hub()
lanternfly_df = load_dataframe_from_huggingface()
if kde_object is None or lanternfly_df is None:
return gr.Image(visible=False), gr.HTML("<h3>Error: Could not load model or data from Hub.</h3>", visible=True), gr.Markdown(visible=False)
# We still need the raw lat/lon to display the points on the Folium map
latitudes = lanternfly_df['latitude'].values
longitudes = lanternfly_df['longitude'].values
# --- The rest of the function remains the same ---
print("Generating visualizations with pre-trained model...")
heatmap_path, interactive_map_html = plot_kde_and_points_for_gradio(
pittsburgh_lat_min, pittsburgh_lat_max,
pittsburgh_lon_min, pittsburgh_lon_max,
latitudes, longitudes, kde_object
)
print("Finding hotspot landmark...")
hotspot_message = find_hotspot_landmark(latitudes, longitudes, kde_object)
return (
gr.Image(value=heatmap_path, visible=True),
gr.HTML(value=interactive_map_html, visible=True),
gr.Markdown(value=hotspot_message, visible=True)
)
# ==============================================================================
# CELL 4: GRADIO UI DEFINITIONS
# ==============================================================================
def field_capture_ui(camera):
with gr.Blocks():
gr.Markdown("#Lanternfly Data Logging")
with gr.Column(scale=1):
gr.Markdown("### π Location Data")
gps_btn = gr.Button("π Get GPS", variant="primary")
with gr.Row():
lat_box = gr.Textbox(label="Latitude", interactive=True, value="0.0", elem_id="lat")
lon_box = gr.Textbox(label="Longitude", interactive=True, value="0.0", elem_id="lon")
with gr.Row():
accuracy_box = gr.Textbox(label="Accuracy (meters)", interactive=True, value="0.0", elem_id="accuracy")
device_ts_box = gr.Textbox(label="Device Timestamp", interactive=True, elem_id="device_ts")
time_btn = gr.Button("π Get Current Time")
save_btn = gr.Button("πΎ Save (Test Mode)")
status = gr.Markdown("π **Ready**")
preview = gr.JSON(label="Preview JSON")
gps_btn.click(fn=None, inputs=[], outputs=[], js=get_gps_js())
time_btn.click(fn=placeholder_time_capture, inputs=[], outputs=[status, device_ts_box])
save_btn.click(fn=placeholder_save_action, inputs=[camera, lat_box, lon_box, accuracy_box, device_ts_box], outputs=[status, preview])
def image_model_ui(image_in):
with gr.Blocks():
gr.Markdown("# Image Classification Results")
gr.Markdown("Uses an EfficientNetB1 model to classify the uploaded image.")
if PREDICTOR is None:
gr.Warning(PREDICTOR_LOAD_STATUS)
with gr.Row():
proba_pretty = gr.Label(num_top_classes=2, label="Class Probabilities")
confidence_output = gr.Textbox(label="Prediction Summary")
# Attach prediction logic to the passed-in image component
image_in.change(
fn=do_predict,
inputs=[image_in],
outputs=[proba_pretty, confidence_output]
)
# ** NEW / UPDATED **: Add the example images section here
# This assumes you have an 'examples' folder with these images in it.
gr.Examples(
examples=[
"Lanternfly_1.jpg",
"Lanternfly_2.jpg",
"Lanternfly_3.jpg",
"Lanternfly_4.jpg",
"Arts_Building.jpg",
"Empty_1.jpg",
"Empty_2.jpg",
"Ladybug.jpg",
"Ant.jpg"
],
inputs=[image_in],
label="Click an Example to Classify",
examples_per_page=3
)
def kde_analysis_ui():
"""
Renders the complete UI for the KDE tab with the controls on top
and the outputs below.
"""
# --- 1. UI Controls (These will appear on top) ---
gr.Markdown("# Spotted Lanternfly Kernel Density Estimation Analysis")
gr.Markdown("Click the button to generate a Kernel Density Estimation (KDE) analysis based on the data gathered from the classification tab.")
gr.Markdown("This data can be found at rlogh/lanternfly-data on Hugging Face and contains images, geolocal, and temporal data for all samples.")
gr.Markdown("This dataset is public and available for use for any research or learning purposes.")
btn = gr.Button("Generate KDE Visualizations")
# --- 2. Output Areas (These will appear below the button) ---
message_output = gr.Markdown(visible=False)
with gr.Row():
heatmap_output = gr.Image(label="KDE Heatmap (Static)", visible=False)
map_output = gr.HTML(label="Interactive Density Map", visible=False)
# --- 3. Link the Button to the Function and Outputs ---
btn.click(
fn=run_full_analysis_and_update_ui,
inputs=None,
outputs=[heatmap_output, map_output, message_output]
)
with gr.Blocks(title="Unified Lanternfly App") as app:
gr.Markdown("# Lanternfly Tracker")
gr.Markdown("This application allows for the tracking of concentrated lanternflies, mainly around Carnegie Mellon University.")
gr.Markdown("It combines two tools: (1) A field capture and AI Image classifer for identifying lanternflies, and (2) a Kernel Density Estimation (KDE) ML model to visualize lanternfly hotspots on campus.")
gr.Markdown("Photos can be taken and classified as Lanternflies in the Capture & Classification tab. In future this data can be saved in real time to the dataset")
gr.Markdown("To view the overal distribution of lanternflies based on collected data, use the Spatial Analysis (KDE) tab.")
gr.Markdown("The image classifier does make mistake on edge cases, these are included in the examples.")
# TAB 1: (Unchanged)
with gr.Tab("Capture & Classification"):
gr.Info("GPS functionality is now enabled! Data saving is in test mode.")
shared_image_input = gr.Image(
streaming=False, height=380, label="π· Upload Photo (or use camera)",
type="pil", sources=["webcam", "upload"]
)
with gr.Row():
with gr.Column(scale=1):
image_model_ui(shared_image_input)
with gr.Column(scale=1):
field_capture_ui(shared_image_input)
# TAB 2: KDE ANALYSIS (Simplified and Corrected)
with gr.Tab("Spatial Analysis (KDE)"):
# This single function call now builds the entire tab correctly.
kde_analysis_ui()
# Launch the app
if __name__ == "__main__":
app.launch() |