|
|
import fsspec |
|
|
import pyarrow.parquet as pq |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
from io import BytesIO |
|
|
from rasterio.io import MemoryFile |
|
|
import matplotlib.pyplot as plt |
|
|
import cartopy.crs as ccrs |
|
|
import cartopy.io.img_tiles as cimgt |
|
|
from matplotlib.patches import Rectangle |
|
|
import math |
|
|
from matplotlib.figure import Figure |
|
|
from matplotlib.backends.backend_agg import FigureCanvasAgg |
|
|
|
|
|
|
|
|
def crop_center(img_array, cropx, cropy): |
|
|
y, x, c = img_array.shape |
|
|
startx = x // 2 - (cropx // 2) |
|
|
starty = y // 2 - (cropy // 2) |
|
|
return img_array[starty:starty+cropy, startx:startx+cropx] |
|
|
|
|
|
def read_tif_bytes(tif_bytes): |
|
|
with MemoryFile(tif_bytes) as mem_f: |
|
|
with mem_f.open(driver='GTiff') as f: |
|
|
return f.read().squeeze() |
|
|
|
|
|
def read_row_memory(row_dict, columns=["thumbnail"]): |
|
|
url = row_dict['parquet_url'] |
|
|
row_idx = row_dict['parquet_row'] |
|
|
|
|
|
fs_options = { |
|
|
"cache_type": "readahead", |
|
|
"block_size": 5 * 1024 * 1024 |
|
|
} |
|
|
|
|
|
with fsspec.open(url, mode='rb', **fs_options) as f: |
|
|
with pq.ParquetFile(f) as pf: |
|
|
table = pf.read_row_group(row_idx, columns=columns) |
|
|
|
|
|
row_output = {} |
|
|
for col in columns: |
|
|
col_data = table[col][0].as_py() |
|
|
|
|
|
if col != 'thumbnail': |
|
|
row_output[col] = read_tif_bytes(col_data) |
|
|
else: |
|
|
stream = BytesIO(col_data) |
|
|
row_output[col] = Image.open(stream) |
|
|
|
|
|
return row_output |
|
|
|
|
|
def download_and_process_image(product_id, df_source=None, verbose=True): |
|
|
if df_source is None: |
|
|
if verbose: print("❌ Error: No DataFrame provided.") |
|
|
return None, None |
|
|
|
|
|
row_subset = df_source[df_source['product_id'] == product_id] |
|
|
if len(row_subset) == 0: |
|
|
if verbose: print(f"❌ Error: Product ID {product_id} not found in DataFrame.") |
|
|
return None, None |
|
|
|
|
|
row_dict = row_subset.iloc[0].to_dict() |
|
|
|
|
|
if 'parquet_url' in row_dict: |
|
|
url = row_dict['parquet_url'] |
|
|
if 'huggingface.co' in url: |
|
|
row_dict['parquet_url'] = url.replace('https://huggingface.co', 'https://modelscope.cn').replace('resolve/main', 'resolve/master') |
|
|
elif 'hf-mirror.com' in url: |
|
|
row_dict['parquet_url'] = url.replace('https://hf-mirror.com', 'https://modelscope.cn').replace('resolve/main', 'resolve/master') |
|
|
else: |
|
|
if verbose: print("❌ Error: 'parquet_url' missing in metadata.") |
|
|
return None, None |
|
|
|
|
|
if verbose: print(f"⬇️ Fetching data for {product_id} from {row_dict['parquet_url']}...") |
|
|
|
|
|
try: |
|
|
bands_data = read_row_memory(row_dict, columns=['B04', 'B03', 'B02']) |
|
|
|
|
|
if not all(b in bands_data for b in ['B04', 'B03', 'B02']): |
|
|
if verbose: print(f"❌ Error: Missing bands in fetched data for {product_id}") |
|
|
return None, None |
|
|
|
|
|
rgb_img = np.stack([bands_data['B04'], bands_data['B03'], bands_data['B02']], axis=-1) |
|
|
|
|
|
if verbose: |
|
|
print(f"Raw RGB stats: Min={rgb_img.min()}, Max={rgb_img.max()}, Mean={rgb_img.mean()}, Dtype={rgb_img.dtype}") |
|
|
|
|
|
|
|
|
if rgb_img.max() <= 255: |
|
|
|
|
|
pass |
|
|
|
|
|
rgb_norm = (2.5 * (rgb_img.astype(float) / 10000.0)).clip(0, 1) |
|
|
rgb_uint8 = (rgb_norm * 255).astype(np.uint8) |
|
|
|
|
|
if verbose: |
|
|
print(f"Processed RGB stats: Min={rgb_uint8.min()}, Max={rgb_uint8.max()}, Mean={rgb_uint8.mean()}") |
|
|
|
|
|
img_full = Image.fromarray(rgb_uint8) |
|
|
|
|
|
if rgb_uint8.shape[0] >= 384 and rgb_uint8.shape[1] >= 384: |
|
|
cropped_array = crop_center(rgb_uint8, 384, 384) |
|
|
img_384 = Image.fromarray(cropped_array) |
|
|
else: |
|
|
if verbose: print(f"⚠️ Image too small {rgb_uint8.shape}, resizing to 384x384.") |
|
|
img_384 = img_full.resize((384, 384)) |
|
|
|
|
|
if verbose: print(f"✅ Successfully processed {product_id}") |
|
|
return img_384, img_full |
|
|
|
|
|
except Exception as e: |
|
|
if verbose: print(f"❌ Error processing {product_id}: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return None, None |
|
|
|
|
|
|
|
|
class EsriImagery(cimgt.GoogleTiles): |
|
|
def _image_url(self, tile): |
|
|
x, y, z = tile |
|
|
return f'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}' |
|
|
|
|
|
from PIL import Image, ImageDraw, ImageFont |
|
|
|
|
|
def get_placeholder_image(text="Image Unavailable", size=(384, 384)): |
|
|
img = Image.new('RGB', size, color=(200, 200, 200)) |
|
|
d = ImageDraw.Draw(img) |
|
|
try: |
|
|
|
|
|
font = ImageFont.load_default() |
|
|
except: |
|
|
font = None |
|
|
|
|
|
|
|
|
|
|
|
d.text((20, size[1]//2), text, fill=(0, 0, 0), font=font) |
|
|
return img |
|
|
|
|
|
def get_esri_satellite_image(lat, lon, score=None, rank=None, query=None): |
|
|
""" |
|
|
Generates a satellite image visualization using Esri World Imagery via Cartopy. |
|
|
Matches the style of the provided notebook. |
|
|
Uses OO Matplotlib API for thread safety. |
|
|
""" |
|
|
try: |
|
|
imagery = EsriImagery() |
|
|
|
|
|
|
|
|
fig = Figure(figsize=(5, 5), dpi=100) |
|
|
canvas = FigureCanvasAgg(fig) |
|
|
ax = fig.add_subplot(1, 1, 1, projection=imagery.crs) |
|
|
|
|
|
|
|
|
extent_deg = 0.05 |
|
|
ax.set_extent([lon - extent_deg, lon + extent_deg, lat - extent_deg, lat + extent_deg], crs=ccrs.PlateCarree()) |
|
|
|
|
|
|
|
|
ax.add_image(imagery, 14) |
|
|
|
|
|
|
|
|
ax.plot(lon, lat, marker='+', color='yellow', markersize=12, markeredgewidth=2, transform=ccrs.PlateCarree()) |
|
|
|
|
|
|
|
|
box_size_m = 384 * 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dlat = (box_size_m / 111320) |
|
|
dlon = (box_size_m / (111320 * math.cos(math.radians(lat)))) |
|
|
|
|
|
|
|
|
rect_lon = lon - dlon / 2 |
|
|
rect_lat = lat - dlat / 2 |
|
|
|
|
|
|
|
|
rect = Rectangle((rect_lon, rect_lat), dlon, dlat, |
|
|
linewidth=2, edgecolor='red', facecolor='none', transform=ccrs.PlateCarree()) |
|
|
ax.add_patch(rect) |
|
|
|
|
|
|
|
|
title_parts = [] |
|
|
if query: title_parts.append(f"{query}") |
|
|
if rank is not None: title_parts.append(f"Rank {rank}") |
|
|
if score is not None: title_parts.append(f"Score: {score:.4f}") |
|
|
|
|
|
ax.set_title("\n".join(title_parts), fontsize=10) |
|
|
|
|
|
|
|
|
buf = BytesIO() |
|
|
fig.savefig(buf, format='png', bbox_inches='tight') |
|
|
buf.seek(0) |
|
|
|
|
|
return Image.open(buf) |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
error_msg = str(e) |
|
|
if "Connection reset by peer" in error_msg or "Network is unreachable" in error_msg or "urlopen error" in error_msg: |
|
|
print(f"⚠️ Network warning: Could not fetch Esri satellite map for ({lat:.4f}, {lon:.4f}). Server might be offline.") |
|
|
else: |
|
|
print(f"Error generating Esri image for {lat}, {lon}: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return get_placeholder_image(f"Map Unavailable\n({lat:.2f}, {lon:.2f})") |
|
|
|
|
|
def get_esri_satellite_image_url(lat, lon, zoom=14): |
|
|
""" |
|
|
Returns the URL for the Esri World Imagery tile at the given location. |
|
|
""" |
|
|
try: |
|
|
imagery = EsriImagery() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pass |
|
|
except: |
|
|
pass |
|
|
return None |
|
|
|