Spaces:

ML4Sustain
/

EarthExplorer

Running

App Files Files Community

VoyagerXvoyagerx commited on Jan 20

Commit

29fab93

0 Parent(s):

sync from hf

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +54 -0
.gitignore +10 -0
MajorTOM/MajorTOMDataset.py +64 -0
MajorTOM/__init__.py +5 -0
MajorTOM/embedder/MajorTOM_Embedder.py +191 -0
MajorTOM/embedder/__init__.py +2 -0
MajorTOM/embedder/grid_cell_fragment.py +164 -0
MajorTOM/embedder/models/DINOv2_S2RGB.py +91 -0
MajorTOM/embedder/models/SSL4EO_S1RTC.py +125 -0
MajorTOM/embedder/models/SSL4EO_S2L1C.py +97 -0
MajorTOM/embedder/models/SigLIP_S2RGB.py +65 -0
MajorTOM/embedder/models/__init__.py +4 -0
MajorTOM/extras/coverage-example.png +3 -0
MajorTOM/extras/coverage_vis.py +149 -0
MajorTOM/extras/extract-sample-from-raw-S2.ipynb +0 -0
MajorTOM/extras/thumbnail_dem.py +77 -0
MajorTOM/extras/thumbnail_s1rtc.py +80 -0
MajorTOM/extras/thumbnail_s2.py +68 -0
MajorTOM/grid.py +284 -0
MajorTOM/metadata_helpers.py +159 -0
MajorTOM/sample_helpers.py +20 -0
README.md +28 -0
Tutorial.md +162 -0
Tutorial_zh.md +157 -0
app.py +792 -0
configs/huggingface.yaml +12 -0
countries.geo.json +0 -0
data_utils.py +223 -0
embedding_datasets/grid_sample_center_22k_FarSLIP_384x384.parquet +3 -0
embedding_datasets/grid_sample_center_22k_SatCLIP_384x384.parquet +3 -0
embedding_datasets/grid_sample_center_22k_SigLIP_384x384.parquet +3 -0
embedding_datasets/grid_sample_metadata.parquet +3 -0
embedding_datasets/zhejiang_sample_center_2k_FarSLIP_384x384.parquet +3 -0
embedding_datasets/zhejiang_sample_center_2k_SatCLIP_384x384.parquet +3 -0
embedding_datasets/zhejiang_sample_center_2k_SigLIP_384x384.parquet +3 -0
embedding_datasets/zhejiang_sample_metadata.parquet +3 -0
examples/example1.png +3 -0
examples/example2.png +3 -0
examples/example3.png +3 -0
images/CLIP.png +3 -0
images/Image_Search_Amazon.jpg +3 -0
images/Image_Search_Middle_East.jpg +3 -0
images/Location_Search_Amazon.jpg +3 -0
images/Location_Search_Hangzhou.jpg +3 -0
images/Text_Search.jpg +3 -0
images/embedding.png +3 -0
images/framework_en.png +3 -0
images/framework_zh.png +3 -0
images/samples.png +3 -0
models/FarSLIP/.gitignore +160 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,54 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*.tfevents* filter=lfs diff=lfs merge=lfs -text
+*.db* filter=lfs diff=lfs merge=lfs -text
+*.ark* filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.gguf* filter=lfs diff=lfs merge=lfs -text
+*.ggml filter=lfs diff=lfs merge=lfs -text
+*.llamafile* filter=lfs diff=lfs merge=lfs -text
+*.pt2 filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+ViT-SO400M-14-SigLIP-384/open_clip_pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+center_bbx_22k.parquet filter=lfs diff=lfs merge=lfs -text
+embedding_datasets/center_bbx_22k_SigLIP_384x384.parquet filter=lfs diff=lfs merge=lfs -text
+embedding_datasets/center_bbx_22k_FarSLIP_384x384.parquet filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+__pycache__/
+*.py[cod]
+*$py.class
+.gradio/
+.vscode/
+.DS_Store
+checkpoints/
+models/FarSLIP/assets
+models/SatCLIP/figures
+configs/local.yaml

MajorTOM/MajorTOMDataset.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os
+import pandas as pd
+import torch
+from torch.utils.data import Dataset
+from pathlib import Path
+import rasterio as rio
+from PIL import Image
+import torchvision.transforms as transforms
+class MajorTOM(Dataset):
+    """MajorTOM Dataset (https://huggingface.co/Major-TOM)
+    Args:
+        df ((geo)pandas.DataFrame): Metadata dataframe
+        local_dir (string): Root directory of the local dataset version
+        tif_bands (list): A list of tif file names to be read
+        png_bands (list): A list of png file names to be read
+    """
+    def __init__(self,
+                 df,
+                 local_dir = None,
+                 tif_bands=['B04','B03','B02'],
+                 png_bands=['thumbnail'],
+                 tif_transforms=[transforms.ToTensor()],
+                 png_transforms=[transforms.ToTensor()]
+                ):
+        super().__init__()
+        self.df = df
+        self.local_dir = Path(local_dir) if isinstance(local_dir,str) else local_dir
+        self.tif_bands = tif_bands if not isinstance(tif_bands,str) else [tif_bands]
+        self.png_bands = png_bands if not isinstance(png_bands,str) else [png_bands]
+        self.tif_transforms = transforms.Compose(tif_transforms) if tif_transforms is not None else None
+        self.png_transforms = transforms.Compose(png_transforms) if png_transforms is not None else None
+    def __len__(self):
+        return len(self.df)
+    def __getitem__(self, idx):
+        meta = self.df.iloc[idx]
+        product_id = meta.product_id
+        grid_cell = meta.grid_cell
+        row = grid_cell.split('_')[0]
+        path = self.local_dir / Path("{}/{}/{}".format(row, grid_cell, product_id))
+        out_dict = {'meta' : meta}
+        for band in self.tif_bands:
+            with rio.open(path / '{}.tif'.format(band)) as f:
+                out = f.read()
+            if self.tif_transforms is not None:
+                out = self.tif_transforms(out)
+            out_dict[band] = out
+        for band in self.png_bands:
+            out = Image.open(path / '{}.png'.format(band))
+            if self.png_transforms is not None:
+                out = self.png_transforms(out)
+            out_dict[band] = out
+        return out_dict

MajorTOM/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .sample_helpers import *
+from .metadata_helpers import *
+from .MajorTOMDataset import *
+from .grid import *
+from .embedder import *

MajorTOM/embedder/MajorTOM_Embedder.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import numpy as np
+import geopandas as gpd
+import hashlib
+from rasterio.io import MemoryFile
+from .grid_cell_fragment import *
+from .models import *
+import cv2
+class MajorTOM_Embedder(torch.nn.Module):
+    """
+    MajorTOM Embedder class that applies a model to geospatial image fragments,
+    computes embeddings, and returns metadata for each fragment.
+    This class is designed to work with raster data, where the image is fragmented
+    into smaller tiles, and embeddings are computed for each tile using the provided
+    embedder model. The output is a GeoDataFrame containing spatial metadata and
+    the corresponding embeddings for each tile.
+    Attributes:
+        embedder: A model that generates embeddings for image fragments.
+        frag_params: Dictionary containing fragmentation parameters such as the
+                      target overlap and border shift.
+        column_types: Dictionary specifying data types for the output GeoDataFrame columns.
+    """
+    def __init__(self, embedder, target_overlap=0.1, border_shift=True):
+        """
+        Initializes the MajorTOM Embedder with the given parameters.
+        Args:
+            embedder (torch.nn.Module): A model that generates embeddings for image fragments.
+            target_overlap (float): The target overlap between image fragments. Default is 0.1.
+            border_shift (bool): Whether to shift the borders of fragments to avoid edge artifacts. Default is True.
+        """
+        super().__init__()
+        # Model
+        self.embedder = embedder
+        # Fragmentation Settings
+        self.frag_params = params = {
+            'fragment_size' : self.embedder.size[0],
+            'target_overlap' : target_overlap,
+            'border_shift' : border_shift
+        }
+        # Data types for the output dataframe (commented columns need no conversion)
+        self.column_types = {
+            #'unique_id' :,
+            #'embedding' : ,
+            #'timestamp' : ,
+            #'product_id' : ,
+            #'grid_cell' : ,
+            'grid_row_u' : 'int16',
+            'grid_col_r' : 'int16',
+            'centre_lat' : 'float32',
+            'centre_lon' : 'float32',
+            #'utm_footprint' : ,
+            #'utm_crs' : ,
+            #'pixel_bbox' : ,
+        }
+    def bands(self):
+        """
+        Returns the set of input bands in the correct order.
+        Returns:
+            list: List of input bands used by the embedder.
+        """
+        return self.embedder.bands
+    def size(self):
+        """
+        Returns the input image size.
+        Returns:
+            tuple: Tuple representing the image size (height, width).
+        """
+        return self.embedder.size
+    def calculate_checksum(self, geometry, timestamp, product_id, embedding):
+        """
+        Calculates a checksum for the given geometry, timestamp, product ID, and embedding.
+        Args:
+            geometry (shapely.geometry): The geometry object representing the fragment's footprint.
+            timestamp (str): Timestamp of the data.
+            product_id (str): Product identifier.
+            embedding (np.ndarray): The embedding of the image fragment.
+        Returns:
+            str: A SHA256 checksum of the concatenated input parameters.
+        """
+        combined = f"{geometry}_{timestamp}_{product_id}_{embedding}"
+        checksum = hashlib.sha256(combined.encode()).hexdigest()
+        return checksum
+    def _read_image(self, row):
+        """
+        Reads and processes the image bands for a given row, performs optional upsampling
+        if the resolution is mismatched, and returns the image data, footprint, and CRS.
+        Args:
+            row (pandas.Series): The input row containing the image bands.
+        Returns:
+            torch.Tensor: A tensor containing the stacked image bands.
+            shapely.geometry: The footprint of the image.
+            rasterio.crs.CRS: The CRS of the image.
+        """
+        # Read the file
+        img = []
+        for band in self.embedder.bands:
+            with MemoryFile(row[band][0].as_py()) as mem_f:
+                with mem_f.open(driver='GTiff') as f:
+                    crs = f.crs
+                    footprint = box(*f.bounds)
+                    img.append(f.read()[0])
+        # optional upsampling
+        shapes = [layer.shape for layer in img]
+        if any([el!=shapes[0] for el in shapes]): # if any resolution mismatch
+            h, w = max([el[0] for el in shapes]), max([el[1] for el in shapes]) # maximum size
+            for layer_idx, layer in enumerate(img):
+                if layer.shape != (h,w):
+                    img[layer_idx] = cv2.resize(layer, (h,w), interpolation=cv2.INTER_NEAREST)
+        img = torch.from_numpy(np.stack(img,-1).astype(np.float32))
+        return img, footprint, crs
+    def forward(self, row, row_meta, device='cuda'):
+        """
+        Forward pass of the model: Reads the image, fragments it, computes embeddings
+        for each fragment, and returns a GeoDataFrame with the spatial metadata and
+        embeddings.
+        Args:
+            row (pandas.Series): The input row containing the image data.
+            row_meta (pandas.Series): Metadata associated with the row (e.g., timestamp, product_id).
+            device (str): The device to run the model on ('cpu' or 'cuda'). Default is 'cuda'.
+        Returns:
+            geopandas.GeoDataFrame: A GeoDataFrame containing metadata and embeddings for each fragment.
+        """
+        # Read file
+        img, footprint, crs = self._read_image(row)
+        # Fragment the sample
+        fragments, xys = fragment_fn(img, **self.frag_params, return_indices=True, verbose=False)
+        nrows, ncols, c, h, w = fragments.shape
+        # Apply the model
+        with torch.no_grad():
+            embeddings = self.embedder(fragments.reshape(-1,c,h,w).to(device)).view(nrows, ncols, -1)
+        df_rows = []
+        # Pack rows for geoparquet
+        for r_idx in range(nrows):
+            for c_idx in range(ncols):
+                embedding = embeddings[r_idx, c_idx].cpu().numpy()
+                # spatial features per fragment
+                x_offset,y_offset=xys[r_idx,c_idx].int().tolist()
+                pixel_bbox = [x_offset, y_offset, x_offset + h,y_offset + w] # in pixels
+                utm_footprint = crop_footprint(footprint, *img.shape[:2], pixel_bbox)
+                # main footprint is in WGS84 (needs to be consistent across parquet)
+                transformer = Transformer.from_crs(crs, CRS.from_epsg(4326), always_xy=True)
+                geometry = transform(transformer.transform, utm_footprint) # WGS84
+                centre_lon, centre_lat = geometry.centroid.coords[0]
+                row_dict = {
+                    'unique_id' : self.calculate_checksum(geometry, row_meta.timestamp.item(), row_meta.product_id.item(), embedding),
+                    'embedding' : embedding,
+                    'timestamp' : row_meta.timestamp.item(),
+                    'product_id' : row_meta.product_id.item(),
+                    'grid_cell' : row_meta.grid_cell.item(),
+                    'grid_row_u' : row_meta.grid_row_u.item(),
+                    'grid_col_r' : row_meta.grid_col_r.item(),
+                    'geometry' : geometry,
+                    'centre_lat' : centre_lat,
+                    'centre_lon' : centre_lon,
+                    'utm_footprint' : utm_footprint.wkt,
+                    'utm_crs' : crs.to_string(),
+                    'pixel_bbox' : pixel_bbox,
+                }
+                df_rows.append(row_dict)
+        return gpd.GeoDataFrame(df_rows).astype(self.column_types)

MajorTOM/embedder/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .MajorTOM_Embedder import *
2	+ from .grid_cell_fragment import *

MajorTOM/embedder/grid_cell_fragment.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from shapely.ops import transform
+from pyproj import CRS, Transformer
+import geopandas as gpd
+import pandas as pd
+import numpy as np
+from shapely.geometry import Polygon, box
+from rasterio.transform import from_bounds, xy
+#from rasterio.windows import Window, from_bounds
+import rasterio as rio
+def crop_footprint(footprint, height, width, crop_bbox):
+    """
+    Crops the given footprint to the specified bounding box.
+    Args:
+        footprint (shapely.geometry.Polygon): The original footprint of the image or area.
+        height (int): Height of the image (in pixels).
+        width (int): Width of the image (in pixels).
+        crop_bbox (list): The bounding box to crop the footprint. The format is
+                          [col_start, row_start, col_end, row_end], where:
+                          - col_start, row_start: top-left corner
+                          - col_end, row_end: bottom-right corner
+    Returns:
+        shapely.geometry.Polygon: The cropped bounding box in the same coordinate reference system (CRS) as the original footprint.
+    """
+    transform = from_bounds(*footprint.bounds, width, height)
+    # Convert pixel coordinates (col, row) to spatial coordinates (e.g., UTM)
+    # Using the raster's affine transform
+    min_x, min_y = transform * (crop_bbox[0], crop_bbox[1])  # (col_start, row_start)
+    max_x, max_y = transform * (crop_bbox[2], crop_bbox[3])  # (col_end, row_end)
+    # Create a Shapely polygon for the crop's bounding box in UTM
+    return box(min_x, min_y, max_x, max_y)
+def fragment_unfold(image,fragment_size,overlap):
+    """
+    Unfold operation for a fragment with overlap. This function extracts image patches (fragments) with a specified
+    size and overlap between them.
+    Args:
+        image (torch.Tensor or np.ndarray): The input image to be fragmented (height, width, channels).
+        fragment_size (int or list): The size of each fragment. Can be a single integer for square fragments or
+                                     a list of two integers for non-square fragments.
+        overlap (int or list): The overlap between adjacent fragments. Can be a single integer or a list of two integers.
+    Returns:
+        torch.Tensor: The unfolded fragments of the image, each with the specified size and overlap.
+    """
+    # Convert image to a tensor and reorder dimensions if necessary
+    if not torch.is_tensor(image):
+        image = torch.from_numpy(image).permute(2, 0, 1)  # Rearrange to (channels, height, width)
+    if len(image.shape) < 4:
+        image = image.unsqueeze(0)  # Add batch dimension
+    b, c, h, w = image.shape
+    # Ensure fragment size is a list
+    if isinstance(fragment_size, int):
+        fragment_size = [fragment_size, fragment_size]
+    if isinstance(overlap, int):
+        overlap = [overlap, overlap]
+    # Calculate stride based on fragment size and overlap
+    stride = [f - o for f, o in zip(fragment_size, overlap)]
+    # Perform the unfolding operation
+    uf = torch.nn.functional.unfold(image, fragment_size, dilation=1, padding=0, stride=stride)
+    # Reshape and permute to return the unfolded image fragments
+    return uf.view(b, c, *fragment_size, -1).permute(0, 4, 1, 2, 3)[0]
+def fragment_fn(img,
+                fragment_size,
+                target_overlap,
+                border_shift=True, # determines whether the outer border is shifted to ensure full coverage
+                return_indices=False,
+                verbose=False
+               ):
+    """
+    Fragment an image into smaller patches with a specified fragment size and overlap.
+    This function handles different scenarios based on image size, fragment size, and overlap,
+    and creates fragments from the input image accordingly. It also supports shifting the outer
+    border of fragments to ensure full coverage of the image.
+    Args:
+        img (np.ndarray or torch.Tensor): The input image to be fragmented (height, width, channels).
+        fragment_size (int or list): The size of the fragments. Can be a single integer (square) or a list of two integers (non-square).
+        target_overlap (float): The target overlap between adjacent fragments, in pixels.
+        border_shift (bool): Whether to shift the border of fragments to ensure full coverage of the image. Default is True.
+        return_indices (bool): If True, the function will also return the indices (offsets) for each fragment. Default is False.
+        verbose (bool): If True, the function will print additional details about the overlap. Default is False.
+    Returns:
+        torch.Tensor or tuple:
+            - If `return_indices` is False, a tensor containing the image fragments.
+            - If `return_indices` is True, a tuple of the image fragments and their offsets.
+    """
+    h,w,c=img.shape
+    assert h==w # SQUARE IMAGES SUPPORT ONLY
+    hf, wf = fragment_size, fragment_size
+    ho, wo = target_overlap*hf, target_overlap*wf
+    assert h >= hf and w >= wf # reject Scenario 1
+    # Scenario 2
+    if h == hf or w == wf:
+        if not torch.is_tensor(img):
+            img=torch.from_numpy(img).permute(2,0,1)
+        return img.view(1,1,c,h,w)
+    # Scenario 3 & 4
+    # determine number of segments between the centers of outermost fragments
+    h_n = max(1, int(np.round((h-hf)/(hf-ho))))
+    w_n = max(1, int(np.round((w-wf)/(wf-wo))))
+    # adjust practical overlap (divide the distance between the centers of outermost fragments by the true number of segments)
+    aho = int(np.ceil(hf-(h-hf)/(h_n)))
+    awo = int(np.ceil(wf-(w-wf)/(w_n)))
+    # compute fragments (might not exactly fill the outermost border)
+    topleft = fragment_unfold(img.permute(2,0,1),fragment_size=(hf,wf), overlap=(aho,awo)).view(1+h_n, 1+w_n, c, hf, wf)
+    full = topleft
+    if border_shift:
+        if  h > hf+h_n*(hf-aho) or w > wf+w_n*(wf-awo):
+            #print('Outers...')
+            bottomleft = fragment_unfold(img[-hf:,:,:],fragment_size=(hf,wf), overlap=(aho,awo)).view(1,1+w_n,c,hf,wf)
+            topright = fragment_unfold(img[:,-wf:,:],fragment_size=(hf,wf), overlap=(aho,awo)).view(1+h_n,1,c,hf,wf)
+            # Shift last row and col to the border of the original
+            full[:,-1,None] = topright
+            full[-1] = bottomleft
+    if verbose:
+        print('Target Overlap: {} pixels. Feasible Overlap: {} pixels.'.format(ho,aho))
+    if not return_indices:
+        return full
+    else:
+        offset=-1*torch.ones(*full.shape[:2],2)
+        for ridx in range(full.shape[0]):
+            for cidx in range(full.shape[1]):
+                offset[ridx,cidx,1] = cidx * (hf-aho)
+                offset[ridx,cidx,0] = ridx * (wf-awo)
+                if border_shift:
+                    offset[ridx,-1,1] = h-hf
+                    offset[-1,cidx,0] = w-wf
+        return full,offset

MajorTOM/embedder/models/DINOv2_S2RGB.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+from transformers import AutoImageProcessor, AutoModel
+class DINOv2_S2RGB_Embedder(torch.nn.Module):
+    """
+    Embedding wrapper for DINOv2 and Sentinel-2 data.
+    This model uses the DINOv2 architecture to generate embeddings for Sentinel-2 RGB data. The input data (RGB bands)
+    is preprocessed by normalizing and mapping it to true-color values. Then, it is passed through the DINOv2 model
+    to obtain feature embeddings.
+    Preprocessing:
+        The input Sentinel-2 image is divided by 10,000 and multiplied by 2.5 to map it to a true-color image
+        (normalized to the range [0, 1]), followed by processing using the DINOv2 image processor.
+    Model:
+        The DINOv2 model processes RGB input images of shape [224, 224] and produces embeddings, which are then
+        averaged across the sequence dimension to obtain a fixed-size embedding vector.
+    Model Components:
+        - `AutoImageProcessor`: Preprocessing pipeline for handling Sentinel-2 data.
+        - `AutoModel`: DINOv2 transformer model used for feature extraction.
+    Attributes:
+        processor (AutoImageProcessor): The DINOv2 image processor to handle preprocessing.
+        model (AutoModel): The DINOv2 model used to generate embeddings from preprocessed images.
+        bands (list): List of the Sentinel-2 bands used for RGB input (B04, B03, B02).
+        size (tuple): The input size expected by the model (height, width) for the RGB image.
+    """
+    def __init__(self):
+        """
+        Initializes the DINOv2_S2RGB_Embedder by loading the pre-trained DINOv2 model and processor,
+        and setting the expected input size for Sentinel-2 RGB data.
+        This embedder uses the 'facebook/dinov2-base' model for feature extraction from Sentinel-2
+        true-color images (RGB).
+        Attributes:
+            processor (AutoImageProcessor): The DINOv2 image processor for preprocessing Sentinel-2 images.
+            model (AutoModel): The pre-trained DINOv2 model for generating embeddings.
+            bands (list): The Sentinel-2 bands used for RGB data (B04 - Red, B03 - Green, B02 - Blue).
+            size (tuple): The expected input size of the image for the DINOv2 model (height, width).
+        """
+        super().__init__()
+        # Load the DINOv2 processor and model from Hugging Face
+        self.processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
+        self.model = AutoModel.from_pretrained('facebook/dinov2-base')
+        # Define the RGB bands for Sentinel-2 (B04, B03, B02)
+        self.bands = ['B04', 'B03', 'B02']
+        # Extract the input size from the processor settings
+        self.size = self.processor.crop_size['height'], self.processor.crop_size['width']
+    def normalize(self, input):
+        """
+        Normalizes Sentinel-2 RGB data to true-color values.
+        The input image (in raw Sentinel-2 reflectance values) is first divided by 10,000 to convert it
+        to reflectance values in the range [0, 1]. Then, the result is multiplied by 2.5 to obtain true-color
+        values that are suitable for input into the DINOv2 model.
+        Args:
+            input (torch.Tensor): The raw Sentinel-2 image tensor to be normalized.
+        Returns:
+            torch.Tensor: The normalized true-color image.
+        """
+        return (2.5 * (input / 1e4)).clip(0,1)
+    def forward(self, input):
+        """
+        Forward pass through the model to generate embeddings for the input image.
+        The input image is first normalized using the `normalize` method, then processed by the DINOv2 image processor
+        and passed through the DINOv2 model to generate embeddings. The output from the model is averaged across
+        the sequence dimension to obtain a fixed-size embedding.
+        Args:
+            input (torch.Tensor): The input Sentinel-2 image tensor with shape [C, H, W], where C=3 (RGB channels).
+        Returns:
+            torch.Tensor: The embedding vector, averaged over the sequence dimension, with shape [embedding_dim].
+        """
+        model_input = self.processor(self.normalize(input), return_tensors="pt")
+        outputs = self.model(model_input['pixel_values'].to(self.model.device))
+        last_hidden_states = outputs.last_hidden_state
+        return last_hidden_states.mean(dim=1).cpu()

MajorTOM/embedder/models/SSL4EO_S1RTC.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import torch
+from torchgeo.models import ResNet50_Weights
+import timm
+import numpy as np
+class SSL4EO_S1RTC_Embedder(torch.nn.Module):
+    """
+    SSL4EO Embedder for Sentinel-1 data using a pre-trained model.
+    This model is based on the SSL4EO (Self-Supervised Learning for Earth Observation) approach,
+    using a pre-trained ResNet50 model for Sentinel-1 radar data (SAR). The model is fine-tuned
+    to work with Sentinel-1 data and can be used directly for feature extraction.
+    Project Code:
+        https://github.com/zhu-xlab/SSL4EO-S12
+    Publication:
+        https://arxiv.org/abs/2211.07044
+    """
+    def __init__(self, s1_mean=[-12.54847273, -20.19237134], s1_std=[5.25697717,5.91150917]):
+        """
+        Initializes the SSL4EO_S1RTC_Embedder by setting up the mean and standard deviation for Sentinel-1 data normalization,
+        and loading the pre-trained model.
+        The model uses a pre-trained ResNet50 architecture adapted for Sentinel-1 radar (SAR) data, with weights provided
+        by the `torchgeo` library. The `s1_mean` and `s1_std` are used for normalizing the input data to the model.
+        Args:
+            s1_mean (list, optional): Mean values for Sentinel-1 radar (SAR) data. Default is set to SSL4EO's values.
+            s1_std (list, optional): Standard deviation values for Sentinel-1 radar (SAR) data. Default is set to SSL4EO's values.
+        Attributes:
+            s1_mean (torch.FloatTensor): Mean values for normalization.
+            s1_std (torch.FloatTensor): Standard deviation values for normalization.
+            model (torch.nn.Module): The ResNet50 model initialized with pre-trained weights.
+            bands (list): List of Sentinel-1 bands used for input data (VV, VH).
+            size (tuple): The input size expected by the model (224x224 pixels).
+        """
+        super().__init__()
+        self.s1_mean = torch.FloatTensor(s1_mean)
+        self.s1_std = torch.FloatTensor(s1_std)
+        # load model
+        self.model = self.init_model()
+        self.bands = ['vv','vh']
+        self.size = 224,224
+    def init_model(self):
+        """
+        Initializes the ResNet50 model with pre-trained weights for Sentinel-1 data.
+        This method loads the pre-trained model weights for Sentinel-1 data from `ResNet50_Weights.SENTINEL1_ALL_MOCO`
+        and sets the fully connected layer (`fc`) to an identity function to output embeddings directly from the last
+        convolutional layer.
+        Returns:
+            torch.nn.Module: The initialized ResNet50 model.
+        """
+        weights = ResNet50_Weights.SENTINEL1_ALL_MOCO
+        model = timm.create_model('resnet50', in_chans=weights.meta['in_chans'])
+        model.load_state_dict(weights.get_state_dict(progress=True), strict=False)
+        model.fc=torch.nn.Identity()
+        return model
+    def normalize(self, img,scale=1.0):
+        """
+        Normalizes the Sentinel-1 SAR (Synthetic Aperture Radar) data.
+        This method normalizes the Sentinel-1 radar signals using the mean (`s1_mean`)
+        and standard deviation (`s1_std`) values. The radar data is normalized to a
+        standard range, and the pixel values are scaled using a factor (`scale`).
+        Args:
+            img (torch.Tensor): The input Sentinel-1 image to be normalized.
+            scale (float, optional): The scaling factor for the normalized image. Default is 1.0.
+        Returns:
+            torch.Tensor: The normalized and scaled image.
+        """
+        min_value = (self.s1_mean - 2 * self.s1_std).to(img.device)
+        max_value = (self.s1_mean + 2 * self.s1_std).to(img.device)
+        img = (img - min_value[:,None,None]) / (max_value - min_value)[:,None,None] * scale
+        img = img.clip(0,scale).float()
+        return img
+    def preprocess(self, input):
+        """
+        Preprocesses the Sentinel-1 SAR (Synthetic Aperture Radar) data before feeding it into the model.
+        This method applies a logarithmic transformation to the input image to convert
+        it from linear scale to decibel (dB) scale. The image is clipped to avoid
+        logarithm of zero and then normalized using the `normalize` method.
+        Args:
+            input (torch.Tensor): The input Sentinel-1 image (e.g., VV or VH polarization).
+        Returns:
+            torch.Tensor: The preprocessed and normalized image in dB scale.
+        """
+        # Convert the input from linear scale to decibel (dB) scale
+        dB_input = 10 * input.log10(input.clip(min=1e-10))  # Clip to prevent log(0)
+        # Normalize the dB-scaled image
+        return self.normalize(dB_input)
+    def forward(self, input):
+        """
+        Forward pass through the model.
+        The input image is preprocessed using the `preprocess` method and then passed
+        through the ResNet50 model to obtain an embedding.
+        Args:
+            input (torch.Tensor): Preprocessed Sentinel-1 image (e.g., shape: [C, H, W]).
+        Returns:
+            torch.Tensor: The output embedding from the model.
+        """
+        return self.model(self.preprocess(input))

MajorTOM/embedder/models/SSL4EO_S2L1C.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import torch
+from torchgeo.models import ResNet50_Weights
+import timm
+class SSL4EO_S2L1C_Embedder(torch.nn.Module):
+    """
+    SSL4EO Embedder for Sentinel-2 data using a pre-trained model.
+    This model is based on the SSL4EO (Self-Supervised Learning for Earth Observation) approach,
+    using a pre-trained ResNet50 model for Sentinel-2 data. The model is fine-tuned for Sentinel-2
+    images and can be used directly for feature extraction.
+    Project Code:
+        https://github.com/zhu-xlab/SSL4EO-S12
+    Publication:
+        https://arxiv.org/abs/2211.07044
+    """
+    def __init__(self):
+        """
+        Initializes the SSL4EO_S2L1C_Embedder by loading the pre-trained SSL4EO model.
+        The model uses ResNet50 architecture, adapted for Sentinel-2 data with a specific
+        weight configuration (`ResNet50_Weights.SENTINEL2_ALL_DINO`) provided by `torchgeo`.
+        It also defines the bands used for Sentinel-2 data and sets the input image size to
+        224x224 pixels (the model input size).
+        Attributes:
+            model (torch.nn.Module): The ResNet50 model with pre-trained weights for Sentinel-2 data.
+            bands (list): List of Sentinel-2 bands used for input data.
+            size (tuple): The input image size expected by the model, set to 224x224 pixels.
+        """
+        super().__init__()
+        # Load the pre-trained SSL4EO ResNet50 model
+        self.model = self.init_model()
+        # Define the Sentinel-2 L1C bands (e.g., B01, B02, B03, etc.)
+        self.bands = [
+            'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07',
+            'B08', 'B8A', 'B09', 'B10', 'B11', 'B12'
+        ]
+        # Define the expected input size of the model
+        self.size = 224, 224
+    def init_model(self):
+        """
+        Initializes the ResNet50 model with pre-trained weights for Sentinel-2 data.
+        The model is loaded using the `timm` library, with Sentinel-2 specific weights
+        (`ResNet50_Weights.SENTINEL2_ALL_DINO`). The fully connected layer (`fc`) is replaced
+        with an identity function to obtain embeddings directly from the last convolutional
+        layer.
+        Returns:
+            torch.nn.Module: The initialized ResNet50 model.
+        """
+        weights = ResNet50_Weights.SENTINEL2_ALL_DINO
+        model = timm.create_model('resnet50', in_chans=weights.meta['in_chans'])
+        model.load_state_dict(weights.get_state_dict(progress=True), strict=False)
+        model.fc=torch.nn.Identity()
+        return model
+    def preprocess(self, input):
+        """
+        Preprocesses the Sentinel-2 input data for the model.
+        This function normalizes the input image by dividing the pixel values by 10,000.
+        This scaling step ensures that the reflectance values are mapped into an appropriate
+        range for the model.
+        Args:
+            input (torch.Tensor): Input image with Sentinel-2 reflectance values (e.g., shape: [C, H, W]).
+        Returns:
+            torch.Tensor: Preprocessed input, scaled by a factor of 10,000.
+        """
+        return input / 1e4
+    def forward(self, input):
+        """
+        Forward pass through the model.
+        The input image is preprocessed and then passed through the ResNet50 model to obtain the embedding.
+        Args:
+            input (torch.Tensor): Preprocessed Sentinel-2 image (e.g., shape: [C, H, W]).
+        Returns:
+            torch.Tensor: The output embedding from the model.
+        """
+        return self.model(self.preprocess(input))

MajorTOM/embedder/models/SigLIP_S2RGB.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from open_clip import create_model_from_pretrained, get_tokenizer
+import torch
+class SigLIP_S2RGB_Embedder(torch.nn.Module):
+    """
+    Embedding wrapper for SigLIP and Sentinel-2 data.
+    This model processes Sentinel-2 RGB data and embeds it into a feature space using the DINOv@ transformer model.
+    The preprocessing includes normalizing Sentinel-2 values to create a True-Colour image before passing it through
+    the model. The final output is a high-dimensional feature vector representing the input image.
+    Preprocessing:
+        - Sentinel-2 bands are divided by 10,000 to scale the reflectance values.
+        - Then, the values are multiplied by 2.5 to map them into the [0, 1] range for True-Colour images.
+        - The model input is further processed using the DINOv@ preprocessor.
+    Model:
+        - Takes an RGB input of shape 384x384 pixels and produces an embedding vector.
+    """
+    def __init__(self):
+        super().__init__()
+        # load model
+        self.model, self.preprocess = create_model_from_pretrained('hf-hub:timm/ViT-SO400M-14-SigLIP-384')
+        # Sentinel-2 RGB bands (B04 - Red, B03 - Green, B02 - Blue)
+        self.bands = ['B04', 'B03', 'B02']
+        self.size = self.preprocess.transforms[0].size
+    def normalize(self, input):
+        """
+        Normalizes Sentinel-2 image data to create a True-Colour image.
+        Sentinel-2 images are scaled to reflectance values in the range [0, 1]. This function:
+        - Divides the input by 10,000 to scale Sentinel-2 values.
+        - Multiplies the result by 2.5 to map the values into the True-Colour image range.
+        Args:
+            input (torch.Tensor or np.ndarray): Input image with Sentinel-2 reflectance values.
+        Returns:
+            torch.Tensor: Normalized True-Colour image, clipped to the range [0, 1].
+        """
+        return (2.5 * (input / 1e4)).clip(0,1)
+    def forward(self, input):
+        """
+        Forward pass through the SigLIP model.
+        This method normalizes the input Sentinel-2 image to a True-Colour representation and processes it through
+        the model to obtain an embedding.
+        Args:
+            input (torch.Tensor): A Sentinel-2 image, typically of shape (C, H, W), where C=3 (RGB),
+                                  H=384, and W=384.
+        Returns:
+            torch.Tensor: The image embedding produced by the model.
+        """
+        preprocess_input = self.normalize(input)
+        # normalization only
+        model_input = self.preprocess.transforms[-1](preprocess_input)
+        return self.model.encode_image(model_input)

MajorTOM/embedder/models/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .SigLIP_S2RGB import *
+from .DINOv2_S2RGB import *
+from .SSL4EO_S2L1C import *
+from .SSL4EO_S1RTC import *

MajorTOM/extras/coverage-example.png ADDED Viewed

Git LFS Details

SHA256: a2ed4c9e1b6516b07b803cdced733213d3db3692665c119814fb495089231627
Pointer size: 132 Bytes
Size of remote file: 2.97 MB

MajorTOM/extras/coverage_vis.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.basemap import Basemap
+import PIL
+def get_mask(df):
+    """
+        Take a Major TOM dataframe and create a mask corresponding to available cells
+    """
+    mask = np.zeros((2004,4008), dtype=np.uint8)
+    row_offset = -1002
+    col_offset = -2004
+    nodata = df['nodata'].values > 0.5
+    yy = mask.shape[0] - (np.array(df['grid_row_u']) - row_offset) - 1
+    xx = np.array(df['grid_col_r']) - col_offset
+    yy = yy[~nodata]
+    xx = xx[~nodata]
+    mask[yy, xx] = 255
+    return PIL.Image.fromarray(mask)
+def fig2img(fig):
+    """Convert a Matplotlib figure to a PIL Image and return it"""
+    import io
+    buf = io.BytesIO()
+    fig.savefig(buf)
+    buf.seek(0)
+    img = PIL.Image.open(buf)
+    return img
+def light_basemap():
+    """
+        Bright coloured contours
+    """
+    with plt.ioff():
+        fig, ax = plt.subplots(figsize=(48,24), dpi=167)
+        m = Basemap(projection='sinu', lat_0=0, lon_0=0, resolution='l', ax=ax)
+        m.fillcontinents(color="#9eba9b", lake_color='#CCDDFF')
+        m.drawmapboundary(fill_color="#CCDDFF")
+        m.drawcountries(color="#666666", linewidth=1)
+        m.drawcoastlines(color="#666666", linewidth=1)
+        plt.gca().set_axis_off()
+        plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0,
+                    hspace = 0, wspace = 0)
+        plt.margins(0,0)
+        return fig2img(fig)
+def dark_basemap():
+    """
+        Dark contours
+    """
+    with plt.ioff():
+        fig, ax = plt.subplots(figsize=(48,24), dpi=167)
+        m = Basemap(projection='sinu', lat_0=0, lon_0=0, resolution='l', ax=ax)
+        m.fillcontinents(color="#242424", lake_color='#242424')
+        m.drawmapboundary(fill_color="#242424")
+        m.drawcountries(color="#000000", linewidth=1)
+        m.drawcoastlines(color="#000000", linewidth=1)
+        plt.gca().set_axis_off()
+        plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0,
+                    hspace = 0, wspace = 0)
+        plt.margins(0,0)
+        return fig2img(fig)
+def get_coveragemap(input, input2=None):
+    """
+        Creates a complete coloured Major TOM coverage figure in the same style as in the official documentation
+        Optionally, input2 can be provided and then, the map plots a map with extra colours indicating cells available only in input (green) or only input2 (blue)
+    """
+    if input2 is None:
+        return single_coveragemap(input)
+    else:
+        cmap1 = single_coveragemap(input)
+        cmap2 = single_coveragemap(input2)
+        # arrays for mixing
+        inp1_arr = np.array(cmap1)[...,:3]
+        inp2_arr = np.array(cmap2)[...,:3]
+        common_arr = inp1_arr*(inp1_arr.sum(-1) == inp2_arr.sum(-1))[:,:,None]
+        common_arr[:,:,(1,2)] = 0
+        inp1_arr[:,:,(0,2)] = 0 # Green - indicates presence of S2 only
+        inp2_arr[:,:,(0,1)] = 0 # Blue - indicates presense of DEM only
+        return PIL.Image.fromarray(((common_arr + inp1_arr + inp2_arr)).astype(np.uint8))
+def single_coveragemap(input):
+    """
+        Creates a complete coloured Major TOM coverage figure in the same style as in the official documentation
+    """
+    # compute mask if df is provided
+    if isinstance(input, pd.DataFrame):
+        mask = get_mask(input)
+    else:
+        mask = input
+    basemap = light_basemap()
+    basemap_d = dark_basemap()
+    outside_earth = np.array(basemap.convert('RGBA'))[:, :, 0] == 255
+    outside_earth = PIL.Image.fromarray(outside_earth)
+    mask = mask.resize(basemap.size, PIL.Image.NEAREST)
+    basemap.putalpha(mask)
+    # Mask outside of earth
+    basemap.paste(outside_earth, (0,0), outside_earth)
+    basemap_d.paste(basemap, (0,0), basemap)
+    return basemap_d
+if __name__ == '__main__':
+    DATASET_NAME = 'Major-TOM/Core-S2L2A'
+    meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
+    df = pd.read_parquet(meta_path)
+    # This is how you make a coverage figure!
+    coverage_img = get_coveragemap(df)
+    coverage_img.save('coverage-example.png', format='PNG')
+    # and this is how you can create an overap for 2 datasets!
+    DATASET_NAME = 'Major-TOM/Core-DEM'
+    meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
+    dem_df = pd.read_parquet(meta_path)
+    coverage_img = get_coveragemap(df,dem_df)
+    coverage_img.save('overlap-coverage-example.png', format='PNG')

MajorTOM/extras/extract-sample-from-raw-S2.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

MajorTOM/extras/thumbnail_dem.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+    NOTE: Major TOM standard does not require any specific type of thumbnail to be computed.
+    Instead these are shared as optional help since this is how the Core dataset thumbnails have been computed.
+"""
+from rasterio.io import MemoryFile
+from PIL import Image
+import numpy as np
+import os
+from pathlib import Path
+import rasterio as rio
+from matplotlib.colors import LightSource
+def get_grayscale(x):
+    """
+        Normalized grayscale visualisation
+    """
+    # normalize
+    x_n = x-x.min()
+    x_n = x_n/x_n.max()
+    return np.uint8(x_n*255)
+def get_hillshade(x, azdeg=315, altdeg=45,ve=1):
+    """
+        Hillshade visualisation for DEM
+    """
+    ls = LightSource(azdeg=azdeg, altdeg=altdeg)
+    return np.uint8(255*ls.hillshade(x, vert_exag=ve))
+def dem_thumbnail(dem, dem_NODATA = -32768.0, hillshade=True):
+    """
+        Takes vv and vh numpy arrays along with the corresponding NODATA values (default is -32768.0)
+        Returns a numpy array with the thumbnail
+    """
+    if hillshade:
+        return get_hillshade(dem)
+    else:
+        return get_grayscale(dem)
+def dem_thumbnail_from_datarow(datarow):
+    """
+        Takes a datarow directly from one of the data parquet files
+        Returns a PIL Image
+    """
+    with MemoryFile(datarow['DEM'][0].as_py()) as mem_f:
+        with mem_f.open(driver='GTiff') as f:
+            dem=f.read().squeeze()
+            dem_NODATA = f.nodata
+    img = dem_thumbnail(dem, dem_NODATA)
+    return Image.fromarray(img,'L')
+if __name__ == '__main__':
+    from fsspec.parquet import open_parquet_file
+    import pyarrow.parquet as pq
+    print('[example run] reading file from HuggingFace...')
+    url = "https://huggingface.co/datasets/Major-TOM/Core-DEM/resolve/main/images/part_01001.parquet"
+    with open_parquet_file(url) as f:
+        with pq.ParquetFile(f) as pf:
+            first_row_group = pf.read_row_group(1)
+    print('[example run] computing the thumbnail...')
+    thumbnail = dem_thumbnail_from_datarow(first_row_group)
+    thumbnail_fname = 'example_thumbnail.png'
+    thumbnail.save(thumbnail_fname, format = 'PNG')
+    print('[example run] saved as "{}"'.format(thumbnail_fname))

MajorTOM/extras/thumbnail_s1rtc.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+    NOTE: Major TOM standard does not require any specific type of thumbnail to be computed.
+    Instead these are shared as optional help since this is how the Core dataset thumbnails have been computed.
+"""
+from rasterio.io import MemoryFile
+from PIL import Image
+import numpy as np
+def s1rtc_thumbnail(vv, vh, vv_NODATA = -32768.0, vh_NODATA = -32768.0):
+    """
+        Takes vv and vh numpy arrays along with the corresponding NODATA values (default is -32768.0)
+        Returns a numpy array with the thumbnail
+    """
+    # valid data masks
+    vv_mask = vv != vv_NODATA
+    vh_mask = vh != vh_NODATA
+    # remove invalid values before log op
+    vv[vv<0] = vv[vv>=0].min()
+    vh[vh<0] = vh[vh>=0].min()
+    # apply log op
+    vv_dB = 10*np.log10(vv)
+    vh_dB = 10*np.log10(vh)
+    # scale to 0-255
+    vv_dB = (vv_dB - vv_dB[vv_mask].min()) / (vv_dB[vv_mask].max() - vv_dB[vv_mask].min()) * 255
+    vh_dB = (vh_dB - vh_dB[vh_mask].min()) / (vh_dB[vh_mask].max() - vh_dB[vh_mask].min()) * 255
+    # represent nodata as 0
+    vv_dB[vv_mask==0] = 0
+    vh_dB[vh_mask==0] = 0
+    # false colour composite
+    return np.stack([vv_dB,
+                    255*(vv_dB+vh_dB)/np.max(vv_dB+vh_dB),
+                    vh_dB
+                   ],-1).astype(np.uint8)
+def s1rtc_thumbnail_from_datarow(datarow):
+    """
+        Takes a datarow directly from one of the data parquet files
+        Returns a PIL Image
+    """
+    with MemoryFile(datarow['vv'][0].as_py()) as mem_f:
+        with mem_f.open(driver='GTiff') as f:
+            vv=f.read().squeeze()
+            vv_NODATA = f.nodata
+    with MemoryFile(datarow['vh'][0].as_py()) as mem_f:
+        with mem_f.open(driver='GTiff') as f:
+            vh=f.read().squeeze()
+            vh_NODATA = f.nodata
+    img = s1rtc_thumbnail(vv, vh, vv_NODATA=vv_NODATA, vh_NODATA=vh_NODATA)
+    return Image.fromarray(img)
+if __name__ == '__main__':
+    from fsspec.parquet import open_parquet_file
+    import pyarrow.parquet as pq
+    print('[example run] reading file from HuggingFace...')
+    url = "https://huggingface.co/datasets/Major-TOM/Core-S1RTC/resolve/main/images/part_00001.parquet"
+    with open_parquet_file(url) as f:
+        with pq.ParquetFile(f) as pf:
+            first_row_group = pf.read_row_group(1)
+    print('[example run] computing the thumbnail...')
+    thumbnail = s1rtc_thumbnail_from_datarow(first_row_group)
+    thumbnail_fname = 'example_thumbnail.png'
+    thumbnail.save(thumbnail_fname, format = 'PNG')
+    print('[example run] saved as "{}"'.format(thumbnail_fname))

MajorTOM/extras/thumbnail_s2.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""
+    NOTE: Major TOM standard does not require any specific type of thumbnail to be computed.
+    Instead these are shared as optional help since this is how the Core dataset thumbnails have been computed.
+"""
+from rasterio.io import MemoryFile
+from PIL import Image
+import numpy as np
+def s2l2a_thumbnail(B04, B03, B02, gain=1.3, gamma=0.6):
+    """
+        Takes B04, B03, B02 numpy arrays along with the corresponding NODATA values (default is -32768.0)
+        Returns a numpy array with the thumbnail
+    """
+    # concatenate
+    thumb = np.stack([B04, B03, B02], -1)
+    # apply gain & gamma
+    thumb = gain*((thumb/10_000)**gamma)
+    return (thumb.clip(0,1)*255).astype(np.uint8)
+def s2l2a_thumbnail_from_datarow(datarow):
+    """
+        Takes a datarow directly from one of the data parquet files
+        Returns a PIL Image
+    """
+    # red
+    with MemoryFile(datarow['B04'][0].as_py()) as mem_f:
+        with mem_f.open(driver='GTiff') as f:
+            B04=f.read().squeeze()
+            B04_NODATA = f.nodata
+    # green
+    with MemoryFile(datarow['B03'][0].as_py()) as mem_f:
+        with mem_f.open(driver='GTiff') as f:
+            B03=f.read().squeeze()
+            B03_NODATA = f.nodata
+    # blue
+    with MemoryFile(datarow['B02'][0].as_py()) as mem_f:
+        with mem_f.open(driver='GTiff') as f:
+            B02=f.read().squeeze()
+            B02_NODATA = f.nodata
+    img = s2l2a_thumbnail(B04,B03,B02)
+    return Image.fromarray(img)
+if __name__ == '__main__':
+    from fsspec.parquet import open_parquet_file
+    import pyarrow.parquet as pq
+    print('[example run] reading file from HuggingFace...')
+    url = "https://huggingface.co/datasets/Major-TOM/Core-S2L2A/resolve/main/images/part_01000.parquet"
+    with open_parquet_file(url, columns = ["B04", "B03", "B02"]) as f:
+        with pq.ParquetFile(f) as pf:
+            first_row_group = pf.read_row_group(1, columns = ["B04", "B03", "B02"])
+    print('[example run] computing the thumbnail...')
+    thumbnail = s2l2a_thumbnail_from_datarow(first_row_group)
+    thumbnail.save('example_thumbnail.png', format = 'PNG')

MajorTOM/grid.py ADDED Viewed

	@@ -0,0 +1,284 @@

+import numpy as np
+import math
+import pandas as pd
+import geopandas as gpd
+from shapely.geometry import LineString, Polygon
+from tqdm import tqdm
+import re
+class Grid():
+    RADIUS_EQUATOR = 6378.137 # km
+    def __init__(self,dist,latitude_range=(-85,85),longitude_range=(-180,180),utm_definition='bottomleft'):
+        self.dist = dist
+        self.latitude_range = latitude_range
+        self.longitude_range = longitude_range
+        self.utm_definition = utm_definition
+        self.rows,self.lats = self.get_rows()
+        self.points, self.points_by_row = self.get_points()
+    def get_rows(self):
+        # Define set of latitudes to use, based on the grid distance
+        arc_pole_to_pole = math.pi * self.RADIUS_EQUATOR
+        num_divisions_in_hemisphere = math.ceil(arc_pole_to_pole / self.dist)
+        latitudes = np.linspace(-90, 90, num_divisions_in_hemisphere+1)[:-1]
+        latitudes = np.mod(latitudes, 180) - 90
+        # order should be from south to north
+        latitudes = np.sort(latitudes)
+        zeroth_row = np.searchsorted(latitudes,0)
+        # From 0U-NU and 1D-ND
+        rows = [None] * len(latitudes)
+        rows[zeroth_row:] = [f'{i}U' for i in range(len(latitudes)-zeroth_row)]
+        rows[:zeroth_row] = [f'{abs(i-zeroth_row)}D' for i in range(zeroth_row)]
+        # bound to range
+        idxs = (latitudes>=self.latitude_range[0]) * (latitudes<=self.latitude_range[1])
+        rows,latitudes = np.array(rows), np.array(latitudes)
+        rows,latitudes = rows[idxs],latitudes[idxs]
+        return rows,latitudes
+    def get_circumference_at_latitude(self,lat):
+        # Circumference of the cross-section of a sphere at a given latitude
+        radius_at_lat = self.RADIUS_EQUATOR * math.cos(lat * math.pi / 180)
+        circumference = 2 * math.pi * radius_at_lat
+        return circumference
+    def subdivide_circumference(self,lat,return_cols=False):
+        # Provide a list of longitudes that subdivide the circumference of the earth at a given latitude
+        # into equal parts as close as possible to dist
+        circumference = self.get_circumference_at_latitude(lat)
+        num_divisions = math.ceil(circumference / self.dist)
+        longitudes = np.linspace(-180,180, num_divisions+1)[:-1]
+        longitudes = np.mod(longitudes, 360) - 180
+        longitudes = np.sort(longitudes)
+        if return_cols:
+            cols = [None] * len(longitudes)
+            zeroth_idx = np.where(longitudes==0)[0][0]
+            cols[zeroth_idx:] = [f'{i}R' for i in range(len(longitudes)-zeroth_idx)]
+            cols[:zeroth_idx] = [f'{abs(i-zeroth_idx)}L' for i in range(zeroth_idx)]
+            return np.array(cols),np.array(longitudes)
+        return np.array(longitudes)
+    def get_points(self):
+        r_idx = 0
+        points_by_row = [None]*len(self.rows)
+        for r,lat in zip(self.rows,self.lats):
+            point_names,grid_row_names,grid_col_names,grid_row_idx,grid_col_idx,grid_lats,grid_lons,utm_zones,epsgs = [],[],[],[],[],[],[],[],[]
+            cols,lons = self.subdivide_circumference(lat,return_cols=True)
+            cols,lons = self.filter_longitude(cols,lons)
+            c_idx = 0
+            for c,lon in zip(cols,lons):
+                point_names.append(f'{r}_{c}')
+                grid_row_names.append(r)
+                grid_col_names.append(c)
+                grid_row_idx.append(r_idx)
+                grid_col_idx.append(c_idx)
+                grid_lats.append(lat)
+                grid_lons.append(lon)
+                if self.utm_definition == 'bottomleft':
+                    utm_zones.append(get_utm_zone_from_latlng([lat,lon]))
+                elif self.utm_definition == 'center':
+                    center_lat = lat + (1000*self.dist/2)/111_120
+                    center_lon = lon + (1000*self.dist/2)/(111_120*math.cos(center_lat*math.pi/180))
+                    utm_zones.append(get_utm_zone_from_latlng([center_lat,center_lon]))
+                else:
+                    raise ValueError(f'Invalid utm_definition {self.utm_definition}')
+                epsgs.append(f'EPSG:{utm_zones[-1]}')
+                c_idx += 1
+            points_by_row[r_idx] = gpd.GeoDataFrame({
+                'name':point_names,
+                'row':grid_row_names,
+                'col':grid_col_names,
+                'row_idx':grid_row_idx,
+                'col_idx':grid_col_idx,
+                'utm_zone':utm_zones,
+                'epsg':epsgs
+            },geometry=gpd.points_from_xy(grid_lons,grid_lats))
+            r_idx += 1
+        points = gpd.GeoDataFrame(pd.concat(points_by_row))
+        # points.reset_index(inplace=True,drop=True)
+        return points, points_by_row
+    def group_points_by_row(self):
+        # Make list of different gdfs for each row
+        points_by_row = [None]*len(self.rows)
+        for i,row in enumerate(self.rows):
+            points_by_row[i] = self.points[self.points.row==row]
+        return points_by_row
+    def filter_longitude(self,cols,lons):
+        idxs = (lons>=self.longitude_range[0]) * (lons<=self.longitude_range[1])
+        cols,lons = cols[idxs],lons[idxs]
+        return cols,lons
+    def latlon2rowcol(self,lats,lons,return_idx=False,integer=False):
+        """
+        Convert latitude and longitude to row and column number from the grid
+        """
+        # Always take bottom left corner of grid cell
+        rows = np.searchsorted(self.lats,lats)-1
+        # Get the possible points of the grid cells at the given latitude
+        possible_points = [self.points_by_row[row] for row in rows]
+        # For each point, find the rightmost point that is still to the left of the given longitude
+        cols = [poss_points.iloc[np.searchsorted(poss_points.geometry.x,lon)-1].col for poss_points,lon in zip(possible_points,lons)]
+        rows = self.rows[rows].tolist()
+        outputs = [rows, cols]
+        if return_idx:
+            # Get the table index for self.points with each row,col pair in rows, cols
+            idx = [self.points[(self.points.row==row) & (self.points.col==col)].index.values[0] for row,col in zip(rows,cols)]
+            outputs.append(idx)
+        # return raw numbers
+        if integer:
+            outputs[0] = [int(el[:-1]) if el[-1] == 'U' else -int(el[:-1]) for el in outputs[0]]
+            outputs[1] = [int(el[:-1]) if el[-1] == 'R' else -int(el[:-1]) for el in outputs[1]]
+        return outputs
+    def rowcol2latlon(self,rows,cols):
+        point_geoms = [self.points.loc[(self.points.row==row) & (self.points.col==col),'geometry'].values[0] for row,col in zip(rows,cols)]
+        lats = [point.y for point in point_geoms]
+        lons = [point.x for point in point_geoms]
+        return lats,lons
+    def get_bounded_footprint(self,point,buffer_ratio=0):
+        # Gets the polygon footprint of the grid cell for a given point, bounded by the other grid points' cells.
+        # Grid point defined as bottom-left corner of polygon. Buffer ratio is the ratio of the grid cell's width/height to buffer by.
+        bottom,left = point.geometry.y,point.geometry.x
+        row_idx = point.row_idx
+        col_idx = point.col_idx
+        next_row_idx = row_idx+1
+        next_col_idx = col_idx+1
+        if next_row_idx >= len(self.lats): # If at top row, use difference between top and second-to-top row for height
+            height = (self.lats[row_idx] - self.lats[row_idx-1])
+            top = self.lats[row_idx] + height
+        else:
+            top = self.lats[next_row_idx]
+        max_col = len(self.points_by_row[row_idx].col_idx)-1
+        if next_col_idx > max_col: # If at rightmost column, use difference between rightmost and second-to-rightmost column for width
+            width = (self.points_by_row[row_idx].iloc[col_idx].geometry.x - self.points_by_row[row_idx].iloc[col_idx-1].geometry.x)
+            right = self.points_by_row[row_idx].iloc[col_idx].geometry.x + width
+        else:
+            right = self.points_by_row[row_idx].iloc[next_col_idx].geometry.x
+        # Buffer the polygon by the ratio of the grid cell's width/height
+        width = right - left
+        height = top - bottom
+        buffer_horizontal = width * buffer_ratio
+        buffer_vertical = height * buffer_ratio
+        new_left = left - buffer_horizontal
+        new_right = right + buffer_horizontal
+        new_bottom = bottom - buffer_vertical
+        new_top = top + buffer_vertical
+        bbox = Polygon([(new_left,new_bottom),(new_left,new_top),(new_right,new_top),(new_right,new_bottom)])
+        return bbox
+def get_utm_zone_from_latlng(latlng):
+    """
+    Get the UTM zone from a latlng list and return the corresponding EPSG code.
+    Parameters
+    ----------
+    latlng : List[Union[int, float]]
+        The latlng list to get the UTM zone from.
+    Returns
+    -------
+    str
+        The EPSG code for the UTM zone.
+    """
+    assert isinstance(latlng, (list, tuple)), "latlng must be in the form of a list or tuple."
+    longitude = latlng[1]
+    latitude = latlng[0]
+    zone_number = (math.floor((longitude + 180) / 6)) % 60 + 1
+    # Special zones for Svalbard and Norway
+    if latitude >= 56.0 and latitude < 64.0 and longitude >= 3.0 and longitude < 12.0:
+        zone_number = 32
+    elif latitude >= 72.0 and latitude < 84.0:
+        if longitude >= 0.0 and longitude < 9.0:
+            zone_number = 31
+        elif longitude >= 9.0 and longitude < 21.0:
+            zone_number = 33
+        elif longitude >= 21.0 and longitude < 33.0:
+            zone_number = 35
+        elif longitude >= 33.0 and longitude < 42.0:
+            zone_number = 37
+    # Determine the hemisphere and construct the EPSG code
+    if latitude < 0:
+        epsg_code = f"327{zone_number:02d}"
+    else:
+        epsg_code = f"326{zone_number:02d}"
+    if not re.match(r"32[6-7](0[1-9]|[1-5][0-9]|60)",epsg_code):
+        print(f"latlng: {latlng}, epsg_code: {epsg_code}")
+        raise ValueError(f"out of bound latlng resulted in incorrect EPSG code for the point")
+    return epsg_code
+if __name__ == '__main__':
+    assert get_utm_zone_from_latlng([-1,-174.34]) == "32701"
+    assert get_utm_zone_from_latlng([48,-4]) == "32630"
+    assert get_utm_zone_from_latlng([78,13]) == "32633"
+    assert get_utm_zone_from_latlng([-34,19.7]) == "32734"
+    assert get_utm_zone_from_latlng([-36,175.7]) == "32760"
+    dist = 100
+    grid = Grid(dist)
+    np.random.seed(0)
+    test_lons = np.random.uniform(-20,20,size=(1000)) % 180 # Checks edge-case of crossing 180th meridian
+    test_lats = np.random.uniform(-20,68,size=(1000))
+    test_rows,test_cols = grid.latlon2rowcol(test_lats,test_lons)
+    test_lats2,test_lons2 = grid.rowcol2latlon(test_rows,test_cols)
+    print(test_lons[:10])
+    print(test_lats[:10])
+    print(test_rows[:10])
+    print(test_cols[:10])
+    # Make line segments from the points to their corresponding grid points
+    lines = []
+    for i in range(len(test_lats)):
+        lines.append([(test_lons[i],test_lats[i]),(test_lons2[i],test_lats2[i])])
+    lines = gpd.GeoDataFrame(geometry=gpd.GeoSeries([LineString(line) for line in lines]))
+    lines.to_file(f'testlines_{dist}km.geojson',driver='GeoJSON')
+    grid.points.to_file(f'testgrid_{dist}km.geojson',driver='GeoJSON')

MajorTOM/metadata_helpers.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import pyarrow.parquet as pq
+import pandas as pd
+import geopandas as gpd
+from pathlib import Path
+import urllib.request
+import fsspec
+from fsspec.parquet import open_parquet_file
+from io import BytesIO
+from PIL import Image
+from rasterio.io import MemoryFile
+from tqdm.notebook import tqdm
+import os
+from .sample_helpers import *
+def metadata_from_url(access_url, local_url):
+    local_url, response = urllib.request.urlretrieve(access_url, local_url)
+    df = pq.read_table(local_url).to_pandas()
+    df['timestamp'] = pd.to_datetime(df.timestamp)
+    gdf = gpd.GeoDataFrame(
+        df, geometry=gpd.points_from_xy(df.centre_lon, df.centre_lat), crs=df.crs.iloc[0]
+    )
+    return gdf
+def filter_metadata(df,
+                    region=None,
+                    daterange=None,
+                    cloud_cover=(0,100),
+                    nodata=(0, 1.0)
+                   ):
+    """Filters the Major-TOM dataframe based on several parameters
+    Args:
+        df (geopandas dataframe): Parent dataframe
+        region (shapely geometry object) : Region of interest
+        daterange (tuple) : Inclusive range of dates (example format: '2020-01-01')
+        cloud_cover (tuple) : Inclusive percentage range (0-100) of cloud cover
+        nodata (tuple) : Inclusive fraction (0.0-1.0) of no data allowed in a sample
+    Returns:
+        df: a filtered dataframe
+    """
+    # temporal filtering
+    if daterange is not None:
+        assert (isinstance(daterange, list) or isinstance(daterange, tuple)) and len(daterange)==2
+        df = df[df.timestamp >= daterange[0]]
+        df = df[df.timestamp <= daterange[1]]
+    # spatial filtering
+    if region is not None:
+        idxs = df.sindex.query(region)
+        df = df.take(idxs)
+    # cloud filtering
+    if cloud_cover is not None:
+        df = df[df.cloud_cover >= cloud_cover[0]]
+        df = df[df.cloud_cover <= cloud_cover[1]]
+    # spatial filtering
+    if nodata is not None:
+        df = df[df.nodata >= nodata[0]]
+        df = df[df.nodata <= nodata[1]]
+    return df
+def read_row(row, columns=["thumbnail"]):
+    """Reads a row from a Major-TOM dataframe
+    Args:
+        row (row from geopandas dataframe): The row of metadata
+        columns (list): columns to be read from the file
+    Returns:
+        data (dict): dictionary with returned data from requested columns
+    """
+    with open_parquet_file(row.parquet_url, columns=columns, footer_sample_size=2000000) as f:
+        with pq.ParquetFile(f) as pf:
+            row_group = pf.read_row_group(row.parquet_row, columns=columns)
+    if columns == ["thumbnail"]:
+        stream = BytesIO(row_group['thumbnail'][0].as_py())
+        return Image.open(stream)
+    else:
+        row_output = {}
+        for col in columns:
+            bytes = row_group[col][0].as_py()
+            if col != 'thumbnail':
+                row_output[col] = read_tif_bytes(bytes)
+            else:
+                stream = BytesIO(bytes)
+                row_output[col] = Image.open(stream)
+        return row_output
+def filter_download(df, local_dir, source_name, by_row = False, verbose = False, tif_columns=None):
+    """Downloads and unpacks the data of Major-TOM based on a metadata dataframe
+    Args:
+        df (geopandas dataframe): Metadata dataframe
+        local_dir (str or Path) : Path to the where the data is to be stored locally
+        source_name (str) : Name alias of the resulting dataset
+        by_row (bool): If True, it will access individual rows of parquet via http - otherwise entire parquets are downloaded temporarily
+        verbose (bool) : option for potential internal state printing
+        tif_columns (list of str) : Optionally specified columns to be downloaded as .tifs, e.g. ['B04', 'B03', 'B02']
+    Returns:
+        None
+    """
+    if isinstance(local_dir, str):
+        local_dir = Path(local_dir)
+    temp_file = local_dir / 'temp.parquet'
+    # identify all parquets that need to be downloaded (group them)
+    urls = df.parquet_url.unique()
+    print('Starting download of {} parquet files.'.format(len(urls))) if verbose else None
+    for url in tqdm(urls, desc='Downloading and unpacking...', disable=not verbose):
+        # identify all relevant rows
+        rows = df[df.parquet_url == url].parquet_row.unique()
+        if not by_row: # (downloads entire parquet)
+            # download a temporary file
+            temp_path, http_resp = urllib.request.urlretrieve(url, temp_file)
+        else:
+            f=fsspec.open(url)
+            temp_path = f.open()
+        # populate the bands
+        with pq.ParquetFile(temp_path) as pf:
+            for row_idx in rows:
+                table = pf.read_row_group(row_idx)
+                product_id = table['product_id'][0].as_py()
+                grid_cell = table['grid_cell'][0].as_py()
+                row = grid_cell.split('_')[0]
+                dest = local_dir / Path("{}/{}/{}/{}".format(source_name, row, grid_cell, product_id))
+                dest.mkdir(exist_ok=True, parents=True)
+                columns = [col for col in table.column_names if col[0] == 'B'] + ['cloud_mask'] if tif_columns is None else tif_columns
+                # tifs
+                for col in columns:
+                    with open(dest / "{}.tif".format(col), "wb") as f:
+                        # Write bytes to file
+                        f.write(table[col][0].as_py())
+                # thumbnail (png)
+                col = 'thumbnail'
+                with open(dest / "{}.png".format(col), "wb") as f:
+                    # Write bytes to file
+                    f.write(table[col][0].as_py())
+        if not by_row:
+            # remove downloaded file
+            os.remove(temp_path)
+        else:
+            f.close()

MajorTOM/sample_helpers.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from rasterio.io import MemoryFile
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+from io import BytesIO
+def plot(sample, bands = ['B04', 'B03', 'B02'], scaling=2e3):
+    img = []
+    for b in bands:
+        img.append(read_tif_bytes(sample[b]))
+    plt.imshow(np.stack(img, -1)/2e3)
+def read_tif_bytes(tif_bytes):
+    with MemoryFile(tif_bytes) as mem_f:
+        with mem_f.open(driver='GTiff') as f:
+            return f.read().squeeze()
+def read_png_bytes(png_bytes):
+    stream = BytesIO(png_bytes)
+    return Image.open(stream)

README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+title: EarthExplorer
+emoji: 🌍
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: 5.9.1
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
+# EarthExplorer
+A tool for searching satellite images of Earth using natural language descriptions, images, geolocations, or a simple click on the map.
+## Features
+- Text-based satellite image search
+- Image-based similarity search
+- Location-based search
+- Interactive map interface
+## Clone
+```bash
+git clone https://huggingface.co/spaces/ML4Sustain/EarthExplorer
+```

Tutorial.md ADDED Viewed

	@@ -0,0 +1,162 @@

+# Tutorial: EarthEmbeddingExplorer
+## Background
+### What is this project about?
+EarthEmbeddingExplorer is a tool that lets you search satellite imagery using **natural language**, **images**, or **geographic locations**. In simple terms, you can enter prompts like “a satellite image of a glacier” or “a satellite image of a city with a coastline”, and the system will find places on Earth that match your description and visualize them on a map.
+EarthEmbeddingExplorer enables users to explore the Earth in multiple ways without leaving their desk, and it can be useful for many geoscience tasks. For example, geologists can quickly locate glacier regions; biologists can rapidly map forest cover; and architects can study urban patterns across different parts of the world.
+## How does it work? (Core ideas)
+### Satellite imagery dataset
+We use **MajorTOM** (Major TOM: Expandable Datasets for Earth Observation) released by the European Space Agency (ESA) [1]. Specifically, we use the [Core-S2L2A](https://modelscope.cn/datasets/Major-TOM/Core-S2L2A) subset.
+| Dataset | Imagery source | Number of samples | Sensor type |
+| :--- | :--- | :--- | :--- |
+| MajorTOM-Core-S2L2A | Sentinel-2 Level 2A | 2,245,886 | Multispectral |
+MajorTOM Core-S2L2A provides global Sentinel-2 multispectral imagery (10 m resolution). We convert the RGB bands into embeddings using CLIP-like models (e.g., SigLIP), which saves substantial time because we do not need to preprocess raw imagery ourselves. In addition, embeddings (vectors) are much smaller than raw imagery, and they are significantly faster to search.
+To keep EarthEmbeddingExplorer responsive, we build a smaller but representative version of the dataset.
+The original tiles in Core-S2L2A are large (1068×1068 pixels), but most AI models expect smaller inputs (384×384 or 224×224 pixels).
+1. **Cropping**: for simplicity, from each original tile we only take the **center** 384×384 (or 224×224) crop to generate an embedding.
+2. **Uniform sampling**: using MajorTOM’s grid coding system, we sample **1%** of the data (about 22,000 images). This preserves global coverage while keeping search fast.
+<div align="center">
+  <img src="images/samples.png" width="50%" />
+  <br>
+  <em>Figure 1: Geographic distribution of our sampled satellite image embeddings.</em>
+</div>
+### Retrieval models
+The core of image retrieval is a family of models known as **CLIP (Contrastive Language-Image Pre-training)** [2]. We use its improved variants such as **SigLIP (Sigmoid Language-Image Pre-training)** [3], **FarSLIP (Fine-grained Aligned Remote Sensing Language Image Pretraining)** [4], and **SatCLIP (Satellite Location-Image Pretraining)** [5].
+An analogy: when teaching a child, you show a picture of a glacier and say “glacier”. After seeing many examples, the child learns to associate the visual concept with the word.
+CLIP-like models learn in a similar way, but at much larger scale.
+- An image encoder turns an **image** into an **embedding** (a vector of numbers).
+- A text (or location) encoder turns **text** (or **latitude/longitude**) into an embedding.
+The key property is: if an image matches a text description (or location), their embeddings will be close; otherwise they will be far apart.
+<div align="center">
+  <img src="images/CLIP.png" width="40%" />
+  <br>
+  <em>Figure 2: How CLIP-like models connect images and text.</em>
+</div>
+The three models we use differ in their encoders and training data:
+| Model | Encoder type | Training data |
+| :--- | :--- | :--- |
+| SigLIP | image encoder + text encoder | natural image–text pairs from the web |
+| FarSLIP | image encoder + text encoder | satellite image–text pairs |
+| SatCLIP | image encoder + location encoder | satellite image–location pairs |
+<div align="center">
+  <img src="images/embedding.png" width="30%" />
+  <br>
+  <em>Figure 3: Converting satellite images into embedding vectors.</em>
+</div>
+In EarthEmbeddingExplorer:
+1. We precompute embeddings for ~22k globally distributed satellite images using SigLIP, FarSLIP, and SatCLIP.
+2. When you provide a query (text like “a satellite image of glacier”, an image, or a location such as (-89, 120)), we encode the query into an embedding using the corresponding encoder.
+3. We compare the query embedding with all image embeddings, visualize similarities on a map, and show the top-5 most similar images.
+## System architecture
+<div align="center">
+  <img src="images/framework_en.png" width="70%" />
+  <br>
+  <em>Figure 4: EarthEmbeddingExplorer system architecture on ModelScope.</em>
+</div>
+We deploy EarthEmbeddingExplorer on ModelScope: the models, embedding datasets, and raw imagery datasets are all hosted on the platform. The app runs on [xGPU](https://www.modelscope.cn/brand/view/xGPU), allowing flexible access to GPU resources and faster retrieval.
+### How is the raw imagery stored?
+MajorTOM Core-S2L2A is large (about 23 TB), so we do not download the full dataset. Instead, the raw imagery is stored as **Parquet shards**:
+- **Shard storage**: the dataset is split into many remote Parquet files (shards), each containing a subset of the samples.
+- **Columnar storage**: different fields/bands (e.g., B04/B03/B02, thumbnail) are stored as separate columns; we only read what we need.
+- **Metadata index**: we maintain a small index table mapping `product_id → (parquet_url, parquet_row)` so the system can locate “which shard and which position” contains a given image.
+With this design, when a user only needs a small number of images from the retrieval results, the system can use **HTTP Range requests** to download only a small byte range from a Parquet file (the target row/row group and the requested columns), rather than downloading the full 23 TB dataset—enabling near real-time retrieval of raw images.
+### What happens when you use the app?
+1. **Enter a query**: you can enter text, upload an image, or input a latitude/longitude. You can also click on the map to use the clicked location as a query.
+2. **Compute similarity**: the app encodes your query into an embedding vector and computes similarity scores against all satellite image embeddings.
+3. **Show results**: the system filters out low-similarity results and shows the highest-scoring locations (and scores) on the map. You can adjust the threshold using a slider.
+4. **Download raw images on demand**: for the top-5 most similar images, the system looks up their `parquet_url` and row position via the metadata index, then uses HTTP Range to fetch only the required data (RGB bands) and displays the images quickly in the UI.
+## Examples
+<div align="center">
+  <img src="images/Text_Search.jpg" width="99%" />
+  <br>
+  <em>Figure 5: Search by text.</em>
+</div>
+<br>
+<div align="center">
+  <img src="images/Image_Search_Amazon.jpg" width="99%" />
+  <br>
+  <em>Figure 6: Search by image.</em>
+</div>
+<br>
+<div align="center">
+  <img src="images/Location_Search_Amazon.jpg" width="99%" />
+  <br>
+  <em>Figure 7: Search by location.</em>
+</div>
+## Limitations
+While EarthEmbeddingExplorer has strong potential, it also has limitations. SigLIP is primarily trained on “natural images” from the internet (people, pets, cars, everyday objects) rather than satellite imagery. This domain gap can make it harder for the model to understand certain scientific terms or distinctive geographic patterns that are uncommon in typical web photos.
+FarSLIP may perform poorly on non-remote-sensing concepts described in text, such as queries like “an image of face”.
+## Acknowledgements
+We thank the following open-source projects and datasets that made EarthEmbeddingExplorer possible:
+**Models:**
+- [SigLIP](https://huggingface.co/timm/ViT-SO400M-14-SigLIP-384) - Vision Transformer model for image-text alignment
+- [FarSLIP](https://github.com/NJU-LHRS/FarSLIP) - Fine-grained satellite image-text pretraining model
+- [SatCLIP](https://github.com/microsoft/satclip) - Satellite location-image pretraining model
+**Datasets:**
+- [MajorTOM](https://github.com/ESA-PhiLab/MajorTOM) - Expandable datasets for Earth observation by ESA
+We are grateful to the research communities and organizations that developed and shared these resources.
+## Contributors
+- [Yijie Zheng](https://voyagerxvoyagerx.github.io/)
+- [Weijie Wu](https://github.com/go-bananas-wwj)
+- [Bingyue Wu](https://brynn-wu.github.io/Brynn-Wu)
+## Roadmap
+- [ ] Increase the geographical coverage (sample rate) to 1.2% of of the Earth's land surface. (coming by 16 Jan!)
+- [ ] Support DINOv2 Embedding model and embedding datasets.
+- [ ] Support FAISS for faster similarity search.
+- [ ] What features do you want? Leave an issue [here](https://huggingface.co/spaces/ML4Sustain/EarthExplorer/discussions)!
+We warmly welcome new contributors!
+## References
+[1] Francis, A., & Czerkawski, M. (2024). Major TOM: Expandable Datasets for Earth Observation. IGARSS 2024.
+[2] Radford, A., et al. (2021). Learning Transferable Visual Models From Natural Language Supervision. ICML 2021.
+[3] Zhai, X., et al. (2023). Sigmoid Loss for Language-Image Pre-Training. ICCV 2023.
+[4] Li, Z., et al. (2025). FarSLIP: Discovering Effective CLIP Adaptation for Fine-Grained Remote Sensing Understanding. arXiv 2025.
+[5] Klemmer, K. et al. (2025). SatCLIP: Global, General-Purpose Location Embeddings with Satellite Imagery. AAAI 2025.
+[6] Czerkawski, M., Kluczek, M., & Bojanowski, J. S. (2024). Global and Dense Embeddings of Earth: Major TOM Floating in the Latent Space. arXiv preprint arXiv:2412.05600.

Tutorial_zh.md ADDED Viewed

	@@ -0,0 +1,157 @@

+# 教程：EarthExplorer 地球探索者
+## 背景介绍
+### 这个项目是做什么的？
+EarthExplorer 是一个可以通过**自然语言**，**图像**，或**地理位置**搜索卫星图像的工具。简单来说，你可以输入像“a satellite image of glacier”或“a satellite image of city with a coastline”这样的描述，系统就会在地球上找到符合你描述的地点，并将它们在地图上展示出来。EarthExplorer 可以让用户足不出户地，以多种方式探索地球上的每一个角落，在地理科学领域有广泛的应用价值。例如，地质学家们可以用这个工具来快速寻找冰川的分布；生物学家可以快速进行森林覆盖的制图，建筑学家们可以研究世界不同地区的城市发展结构。
+## 它是如何工作的？（核心原理）
+### 卫星影像数据集
+我们使用了欧空局（ESA）发布的 **MajorTOM** (Major TOM: Expandable Datasets for Earth Observation) 数据集 [1]。具体来说，我们使用的是 [Core-S2L2A](https://modelscope.cn/datasets/Major-TOM/Core-S2L2A) 这个子集。
+| 数据集 | 影像来源 | 嵌入数量 | 传感器类型 |
+| :--- | :--- | :--- | :--- |
+| MajorTOM-Core-S2L2A | Sentinel-2 Level 2A | 2,245,886 | 多光谱 |
+MajorTOM Core-S2L2A 包含了全球覆盖的 Sentinel-2 多光谱影像（10m 分辨率）；我们将这个数据集则利用 SigLIP 模型将 RGB 波段处理成了嵌入。这为我们节省了大量时间，因为我们不需要自己去处理这些原始图像！此外，图像嵌入（一串数字）的存储空间远小于原始图像，计算效率也更高！
+为了让 EarthExplorer 响应迅速，我们创建了一个更小、更有代表性的数据集版本。
+Core-S2L2A 中的原始卫星图像尺寸很大（1068x1068 像素），但 AI 模型需要较小的输入尺寸（384x384 或 224x224 像素）。
+1. **裁剪**：为了简化，对每个原尺寸图像，我们仅选取大图正中心的 384x384 或 224x224 像素区域所生成的嵌入。
+2. **随机采样**：我们根据 MajorTOM 的网格编码系统，均匀采样了 **1%** 的数据（约 22000 张图像）。这样既能保证全球覆盖，又可以在很短的时间内检索出结果。
+<div align="center">
+  <img src="images/samples.png" width="50%" />
+  <br>
+  <em>图 1：我们采样的卫星图像嵌入的地理分布。</em>
+</div>
+### 检索模型
+图像检索核心技术是一种叫做 **CLIP (Contrastive Language-Image Pre-training)** [2] 的人工智能模型，我们使用的是它的改进版本 **SigLIP (Sigmoid Language-Image Pre-training)** [3], **FarSLIP (Fine-grained Aligned Remote Sensing Language Image Pretraining)** [4], 和 **SatCLIP (Satellite Location-Image Pretraining)** [5]。
+想象一下教小孩子识物。你给他们看一张冰川的照片，并说“冰川”。在看了很多冰川的照片并听到这个词后，孩子就学会了将冰川的样子和“冰川”这个词联系起来。
+SigLIP/FarSLIP/SatCLIP 的工作原理类似，但规模要大得多。它在学习了数百万个图片-文字对或图片-地理位置对，从而理解了图像和文本/地理位置之间的关系。
+- 它使用图片编码器将**图像**转换成一种数学表示（一串数字），我们称之为**嵌入 (Embedding)**。
+- 它也使用文本/地理位置编码器将**文本**或**地理位置（经纬度坐标）**转换成类似的数学表示（嵌入）。
+神奇之处在于，如果一张图片和一段文字描述或经纬度是匹配的，它们转换后的数学表示就会非常接近。如果不匹配，它们就会相距很远。
+<div align="center">
+  <img src="images/CLIP.png" width="40%" />
+  <br>
+  <em>图 2：CLIP 类模型如何连接图像和文本/位置。</em>
+</div>
+我们用到的三个模型的模型结构和训练数据是：
+| 模型 | 编码器类型 | 训练数据来源  |
+| :--- | :--- | :--- |
+| SigLIP | 图像编码器+文本编码器 | 互联网上的自然图像-文本对 |
+| FarSLIP | 图像编码器+文本编码器 | 卫星图像-文本对 |
+| SatCLIP | 图像编码器+位置编码器 | 卫星图像-地理位置对 |
+<div align="center">
+  <img src="images/embedding.png" width="30%" />
+  <br>
+  <em>图 3：将卫星图像转换成嵌入向量。</em>
+</div>
+在 EarthExplorer 中：
+1. 我们将全球均匀采样的两万多张卫星图像，分别使用 SigLIP, FarSLIP, 和 SatCLIP 的图像编码器，将卫星图像已经转换成这种数学“嵌入”。
+2. 当你输入一个查询，这个查询可以是文本（例如“a satellite image of glacier”），图像（一张冰川的图像），或地理位置(-89, 120)，我们将你的查询也使用对应的编码器转换成嵌入。
+3. 然后，我们将你的查询嵌入与所有卫星图像的嵌入进行比较，将相似度在地图上可视化，并展示最相似的5张图像。
+## 系统架构
+<div align="center">
+  <img src="images/framework_zh.png" width="70%" />
+  <br>
+  <em>图 4：基于魔搭创空间的 EarthExplorer 系统架构。</em>
+</div>
+我们基于魔搭平台进行部署：模型、嵌入数据集、以及原始影像数据集都托管在魔搭上。我们将 APP 部署在 [xGPU](https://www.modelscope.cn/brand/view/xGPU) 环境下，使得用户可以获得灵活调度的免费 GPU 资源，加快检索速度。
+### 原始影像是如何存的？
+MajorTOM Core-S2L2A 的原始影像体量很大（约 23TB），以 **Parquet 分片（shard）** 的方式存储：
+- **分片存储**：数据被拆成很多个远端 Parquet 文件（分片），每个分片只包含一部分影像样本。
+- **列式存储**：每个影像的不同字段/波段（例如 B04/B03/B02、thumbnail）存成不同的列，需要什么就读什么。
+- **元数据索引**：我们额外维护一份很小的索引表，把 `product_id → (parquet_url, parquet_row)` 对应起来，告诉系统“这个 id 的影像在哪个分片、在分片里的哪个位置”。
+这样，当用户只需要查看检索结果的少量影像时，系统可以通过 **HTTP Range 请求**只下载 Parquet 文件中“那一小段字节”（对应目标行/行组 + 指定列的数据），而不是下载整个 23TB 数据集，从而实现秒级取图。
+### 当你使用这个 App 时
+1. **输入查询**：你可以输入文字、上传图片、输入经纬度；也可以在地图上点击一个位置，直接把该点经纬度作为查询。
+2. **计算相似度**：App 将你的查询编码成一个“嵌入向量”，并与嵌入数据集中每一张卫星图像的嵌入计算相似度分数。
+3. **展示检索结果**：系统过滤掉相似度较低的结果，把相似度最高的地点（以及分数）显示在地图上；你可以用滑动条调整阈值。
+4. **按需下载原图**：对最相似的前 5 张影像，系统用 `product_id` 查询元数据索引定位到远端 `parquet_url` 和行位置，然后通过 HTTP Range 只拉取对应缩略图数据，在前端快速展示原始影像。
+## 示例
+<div align="center">
+  <img src="images/Text_Search.jpg" width="99%" />
+  <br>
+  <em>图 5：以文搜图示例。</em>
+</div>
+<br>
+<div align="center">
+  <img src="images/Image_Search_Amazon.jpg" width="99%" />
+  <br>
+  <em>图 6：以图搜图示例。</em>
+</div>
+<br>
+<div align="center">
+  <img src="images/Location_Search_Amazon.jpg" width="99%" />
+  <br>
+  <em>图 7：以点搜图示例。</em>
+</div>
+## 局限性
+虽然 EarthExplorer 有很大的应用潜力，但它也有一些局限性。SigLIP 模型主要是通过互联网上的“自然图像”（如人物、猫狗、汽车、日常用品的照片）训练的，而不是专门针对卫星图像训练的。这种训练数据和应用时数据的偏差，使得模型可能难以理解特定的科学术语或在普通网络照片中不常见的独特地理特征。而 FarSLIP 模型对非典型遥感地物的语言描述，例如 'an image of face' 的检索效果不佳。
+未来的工作可以使用其他专门针对地球观测数据训练的 AI 模型来提高检索的准确性。
+## 未来工作
+- 结合时间序列影像，实现全球变化监测
+- 添加不同地球基础模型，对比不同模型的检索性能
+## 致谢
+我们感谢以下开源项目和数据集，它们使 EarthExplorer 得以实现：
+**模型：**
+- [SigLIP](https://huggingface.co/timm/ViT-SO400M-14-SigLIP-384) - 用于图像-文本对齐的视觉Transformer模型
+- [FarSLIP](https://github.com/NJU-LHRS/FarSLIP) - 细粒度卫星图像-文本预训练模型
+- [SatCLIP](https://github.com/microsoft/satclip) - 卫星位置-图像预训练模型
+**数据集：**
+- [MajorTOM](https://github.com/ESA-PhiLab/MajorTOM) - 欧洲航天局（ESA）的可扩展地球观测数据集
+我们感谢开发和分享这些资源的研究社区和组织。
+## 贡献者
+- [郑祎杰](https://voyagerxvoyagerx.github.io/)
+- [伍炜杰](https://github.com/go-bananas-wwj)
+- [吴冰玥](https://brynn-wu.github.io/Brynn-Wu)
+## 引用
+[1] Francis, A., & Czerkawski, M. (2024). Major TOM: Expandable Datasets for Earth Observation. IGARSS 2024.
+[2] Radford, A., et al. (2021). Learning Transferable Visual Models From Natural Language Supervision. ICML 2021.
+[3] Zhai, X., et al. (2023). Sigmoid Loss for Language-Image Pre-Training. ICCV 2023.
+[4] Li, Z., et al. (2025). FarSLIP: Discovering Effective CLIP Adaptation for Fine-Grained Remote Sensing Understanding. arXiv 2025.
+[5] Klemmer, K. et al. (2025). SatCLIP: Global, General-Purpose Location Embeddings with Satellite Imagery. AAAI 2025.
+[6] Czerkawski, M., Kluczek, M., & Bojanowski, J. S. (2024). Global and Dense Embeddings of Earth: Major TOM Floating in the Latent Space. arXiv preprint arXiv:2412.05600.

app.py ADDED Viewed

	@@ -0,0 +1,792 @@

+import gradio as gr
+import torch
+import time
+import os
+import tempfile
+import zipfile
+import numpy as np
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor, as_completed
+# Import custom modules
+from models.siglip_model import SigLIPModel
+from models.satclip_model import SatCLIPModel
+from models.farslip_model import FarSLIPModel
+from models.load_config import load_and_process_config
+from visualize import format_results_for_gallery, plot_top5_overview, plot_location_distribution, plot_global_map_static, plot_geographic_distribution
+from data_utils import download_and_process_image, get_esri_satellite_image, get_placeholder_image
+from PIL import Image as PILImage
+from PIL import ImageDraw, ImageFont
+# Configuration
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Running on device: {device}")
+# Load and process configuration
+config = load_and_process_config()
+# Initialize Models
+print("Initializing models...")
+models = {}
+# SigLIP
+try:
+    if config and 'siglip' in config:
+        models['SigLIP'] = SigLIPModel(
+            ckpt_path=config['siglip'].get('ckpt_path'),
+            tokenizer_path=config['siglip'].get('tokenizer_path'),
+            embedding_path=config['siglip'].get('embedding_path'),
+            device=device
+        )
+    else:
+        models['SigLIP'] = SigLIPModel(device=device)
+except Exception as e:
+    print(f"Failed to load SigLIP: {e}")
+# SatCLIP
+try:
+    if config and 'satclip' in config:
+        models['SatCLIP'] = SatCLIPModel(
+            ckpt_path=config['satclip'].get('ckpt_path'),
+            embedding_path=config['satclip'].get('embedding_path'),
+            device=device
+        )
+    else:
+        models['SatCLIP'] = SatCLIPModel(device=device)
+except Exception as e:
+    print(f"Failed to load SatCLIP: {e}")
+# FarSLIP
+try:
+    if config and 'farslip' in config:
+        models['FarSLIP'] = FarSLIPModel(
+            ckpt_path=config['farslip'].get('ckpt_path'),
+            model_name=config['farslip'].get('model_name'),
+            embedding_path=config['farslip'].get('embedding_path'),
+            device=device
+        )
+    else:
+        models['FarSLIP'] = FarSLIPModel(device=device)
+except Exception as e:
+    print(f"Failed to load FarSLIP: {e}")
+def get_active_model(model_name):
+    if model_name not in models:
+        return None, f"Model {model_name} not loaded."
+    return models[model_name], None
+def combine_images(img1, img2):
+    if img1 is None: return img2
+    if img2 is None: return img1
+    # Resize to match width
+    w1, h1 = img1.size
+    w2, h2 = img2.size
+    new_w = max(w1, w2)
+    new_h1 = int(h1 * new_w / w1)
+    new_h2 = int(h2 * new_w / w2)
+    img1 = img1.resize((new_w, new_h1))
+    img2 = img2.resize((new_w, new_h2))
+    dst = PILImage.new('RGB', (new_w, new_h1 + new_h2), (255, 255, 255))
+    dst.paste(img1, (0, 0))
+    dst.paste(img2, (0, new_h1))
+    return dst
+def create_text_image(text, size=(384, 384)):
+    img = PILImage.new('RGB', size, color=(240, 240, 240))
+    d = ImageDraw.Draw(img)
+    # Try to load a font, fallback to default
+    try:
+        # Try to find a font that supports larger size
+        font = ImageFont.truetype("DejaVuSans.ttf", 40)
+    except:
+        font = ImageFont.load_default()
+    # Wrap text simply
+    margin = 20
+    offset = 100
+    for line in text.split(','):
+        d.text((margin, offset), line.strip(), font=font, fill=(0, 0, 0))
+        offset += 50
+    d.text((margin, offset + 50), "Text Query", font=font, fill=(0, 0, 255))
+    return img
+def fetch_top_k_images(top_indices, probs, df_embed, query_text=None):
+    """
+    Fetches top-k images using actual dataset download (ModelScope) via download_and_process_image.
+    """
+    results = []
+    # We can run this in parallel
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        future_to_idx = {}
+        for i, idx in enumerate(top_indices):
+            row = df_embed.iloc[idx]
+            pid = row['product_id']
+            # Use download_and_process_image to get real data
+            future = executor.submit(download_and_process_image, pid, df_source=df_embed, verbose=False)
+            future_to_idx[future] = idx
+        for future in as_completed(future_to_idx):
+            idx = future_to_idx[future]
+            try:
+                img_384, img_full = future.result()
+                if img_384 is None:
+                    # Fallback to Esri if download fails
+                    print(f"Download failed for idx {idx}, falling back to Esri...")
+                    row = df_embed.iloc[idx]
+                    img_384 = get_esri_satellite_image(row['centre_lat'], row['centre_lon'], score=probs[idx], rank=0, query=query_text)
+                    img_full = img_384
+                row = df_embed.iloc[idx]
+                results.append({
+                    'image_384': img_384,
+                    'image_full': img_full,
+                    'score': probs[idx],
+                    'lat': row['centre_lat'],
+                    'lon': row['centre_lon'],
+                    'id': row['product_id']
+                })
+            except Exception as e:
+                print(f"Error fetching image for idx {idx}: {e}")
+    # Sort results by score descending (since futures complete in random order)
+    results.sort(key=lambda x: x['score'], reverse=True)
+    return results
+def get_all_results_metadata(model, filtered_indices, probs):
+    if len(filtered_indices) == 0:
+        return []
+    # Sort by score descending
+    filtered_scores = probs[filtered_indices]
+    sorted_order = np.argsort(filtered_scores)[::-1]
+    sorted_indices = filtered_indices[sorted_order]
+    # Extract from DataFrame
+    df_results = model.df_embed.iloc[sorted_indices].copy()
+    df_results['score'] = probs[sorted_indices]
+    # Rename columns
+    df_results = df_results.rename(columns={'product_id': 'id', 'centre_lat': 'lat', 'centre_lon': 'lon'})
+    # Convert to list of dicts
+    return df_results[['id', 'lat', 'lon', 'score']].to_dict('records')
+def search_text(query, threshold, model_name):
+    model, error = get_active_model(model_name)
+    if error:
+        yield None, None, error, None, None, None, None
+        return
+    if not query:
+        yield None, None, "Please enter a query.", None, None, None, None
+        return
+    try:
+        timings = {}
+        # 1. Encode Text
+        yield None, None, "Encoding text...", None, None, None, None
+        t0 = time.time()
+        text_features = model.encode_text(query)
+        timings['Encoding'] = time.time() - t0
+        if text_features is None:
+            yield None, None, "Model does not support text encoding or is not initialized.", None, None, None, None
+            return
+        # 2. Search
+        yield None, None, "Encoding text... ✓\nRetrieving similar images...", None, None, None, None
+        t0 = time.time()
+        probs, filtered_indices, top_indices = model.search(text_features, top_percent=threshold/1000.0)
+        timings['Retrieval'] = time.time() - t0
+        if probs is None:
+            yield None, None, "Search failed (embeddings missing?).", None, None, None, None
+            return
+        # Show geographic distribution (not timed)
+        df_embed = model.df_embed
+        geo_dist_map, df_filtered = plot_geographic_distribution(df_embed, probs, threshold/1000.0, title=f'Similarity to "{query}" ({model_name})')
+        # 3. Download Images
+        yield gr.update(visible=False), None, "Encoding text... ✓\nRetrieving similar images... ✓\nDownloading images...", None, None, df_filtered, gr.update(value=geo_dist_map, visible=True)
+        t0 = time.time()
+        top_indices = top_indices[:10]
+        results = fetch_top_k_images(top_indices, probs, df_embed, query_text=query)
+        timings['Download'] = time.time() - t0
+        # 4. Visualize - keep geo_dist_map visible
+        yield gr.update(visible=False), None, "Encoding text... ✓\nRetrieving similar images... ✓\nDownloading images... ✓\nGenerating visualizations...", None, None, df_filtered, gr.update(value=geo_dist_map, visible=True)
+        t0 = time.time()
+        fig_results = plot_top5_overview(None, results, query_info=query)
+        gallery_items = format_results_for_gallery(results)
+        timings['Visualization'] = time.time() - t0
+        # 5. Generate Final Status
+        timing_str = f"Encoding {timings['Encoding']:.1f}s, Retrieval {timings['Retrieval']:.1f}s, Download {timings['Download']:.1f}s, Visualization {timings['Visualization']:.1f}s\n\n"
+        status_msg = timing_str + generate_status_msg(len(filtered_indices), threshold/100.0, results)
+        all_results = get_all_results_metadata(model, filtered_indices, probs)
+        results_txt = format_results_to_text(all_results)
+        yield gr.update(visible=False), gallery_items, status_msg, fig_results, [geo_dist_map, fig_results, results_txt], df_filtered, gr.update(value=geo_dist_map, visible=True)
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        yield None, None, f"Error: {str(e)}", None, None, None, None
+def search_image(image_input, threshold, model_name):
+    model, error = get_active_model(model_name)
+    if error:
+        yield None, None, error, None, None, None, None
+        return
+    if image_input is None:
+        yield None, None, "Please upload an image.", None, None, None, None
+        return
+    try:
+        timings = {}
+        # 1. Encode Image
+        yield None, None, "Encoding image...", None, None, None, None
+        t0 = time.time()
+        image_features = model.encode_image(image_input)
+        timings['Encoding'] = time.time() - t0
+        if image_features is None:
+            yield None, None, "Model does not support image encoding.", None, None, None, None
+            return
+        # 2. Search
+        yield None, None, "Encoding image... ✓\nRetrieving similar images...", None, None, None, None
+        t0 = time.time()
+        probs, filtered_indices, top_indices = model.search(image_features, top_percent=threshold/1000.0)
+        timings['Retrieval'] = time.time() - t0
+        # Show geographic distribution (not timed)
+        df_embed = model.df_embed
+        geo_dist_map, df_filtered = plot_geographic_distribution(df_embed, probs, threshold/1000.0, title=f'Similarity to Input Image ({model_name})')
+        # 3. Download Images
+        yield gr.update(visible=False), None, "Encoding image... ✓\nRetrieving similar images... ✓\nDownloading images...", None, None, df_filtered, gr.update(value=geo_dist_map, visible=True)
+        t0 = time.time()
+        top_indices = top_indices[:6]
+        results = fetch_top_k_images(top_indices, probs, df_embed, query_text="Image Query")
+        timings['Download'] = time.time() - t0
+        # 4. Visualize - keep geo_dist_map visible
+        yield gr.update(visible=False), None, "Encoding image... ✓\nRetrieving similar images... ✓\nDownloading images... ✓\nGenerating visualizations...", None, None, df_filtered, gr.update(value=geo_dist_map, visible=True)
+        t0 = time.time()
+        fig_results = plot_top5_overview(image_input, results, query_info="Image Query")
+        gallery_items = format_results_for_gallery(results)
+        timings['Visualization'] = time.time() - t0
+        # 5. Generate Final Status
+        timing_str = f"Encoding {timings['Encoding']:.1f}s, Retrieval {timings['Retrieval']:.1f}s, Download {timings['Download']:.1f}s, Visualization {timings['Visualization']:.1f}s\n\n"
+        status_msg = timing_str + generate_status_msg(len(filtered_indices), threshold/100.0, results)
+        all_results = get_all_results_metadata(model, filtered_indices, probs)
+        results_txt = format_results_to_text(all_results[:50])
+        yield gr.update(visible=False), gallery_items, status_msg, fig_results, [geo_dist_map, fig_results, results_txt], df_filtered, gr.update(value=geo_dist_map, visible=True)
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        yield None, None, f"Error: {str(e)}", None, None, None, None
+def search_location(lat, lon, threshold):
+    model_name = "SatCLIP"
+    model, error = get_active_model(model_name)
+    if error:
+        yield None, None, error, None, None, None, None
+        return
+    try:
+        timings = {}
+        # 1. Encode Location
+        yield None, None, "Encoding location...", None, None, None, None
+        t0 = time.time()
+        loc_features = model.encode_location(float(lat), float(lon))
+        timings['Encoding'] = time.time() - t0
+        if loc_features is None:
+            yield None, None, "Location encoding failed.", None, None, None, None
+            return
+        # 2. Search
+        yield None, None, "Encoding location... ✓\nRetrieving similar images...", None, None, None, None
+        t0 = time.time()
+        probs, filtered_indices, top_indices = model.search(loc_features, top_percent=threshold/100.0)
+        timings['Retrieval'] = time.time() - t0
+        # 3. Generate Distribution Map (not timed for location distribution)
+        yield None, None, "Encoding location... ✓\nRetrieving similar images... ✓\nGenerating distribution map...", None, None, None, None
+        df_embed = model.df_embed
+        top_10_indices = top_indices[:10]
+        top_10_results = []
+        for idx in top_10_indices:
+            row = df_embed.iloc[idx]
+            top_10_results.append({'lat': row['centre_lat'], 'lon': row['centre_lon']})
+        # Show geographic distribution (not timed)
+        geo_dist_map, df_filtered = plot_geographic_distribution(df_embed, probs, threshold/1000.0, title=f'Similarity to Location ({lat}, {lon})')
+        # 4. Download Images
+        yield gr.update(visible=False), None, "Encoding location... ✓\nRetrieving similar images... ✓\nGenerating distribution map... ✓\nDownloading images...", None, None, df_filtered, gr.update(value=geo_dist_map, visible=True)
+        t0 = time.time()
+        top_6_indices = top_indices[:6]
+        results = fetch_top_k_images(top_6_indices, probs, df_embed, query_text=f"Loc: {lat},{lon}")
+        # Get query tile
+        query_tile = None
+        try:
+            lats = pd.to_numeric(df_embed['centre_lat'], errors='coerce')
+            lons = pd.to_numeric(df_embed['centre_lon'], errors='coerce')
+            dists = (lats - float(lat))**2 + (lons - float(lon))**2
+            nearest_idx = dists.idxmin()
+            pid = df_embed.loc[nearest_idx, 'product_id']
+            query_tile, _ = download_and_process_image(pid, df_source=df_embed, verbose=False)
+        except Exception as e:
+            print(f"Error fetching nearest MajorTOM image: {e}")
+        if query_tile is None:
+            query_tile = get_placeholder_image(f"Query Location\n({lat}, {lon})")
+        timings['Download'] = time.time() - t0
+        # 5. Visualize - keep geo_dist_map visible
+        yield gr.update(visible=False), None, "Encoding location... ✓\nRetrieving similar images... ✓\nGenerating distribution map... ✓\nDownloading images... ✓\nGenerating visualizations...", None, None, df_filtered, gr.update(value=geo_dist_map, visible=True)
+        t0 = time.time()
+        fig_results = plot_top5_overview(query_tile, results, query_info=f"Loc: {lat},{lon}")
+        gallery_items = format_results_for_gallery(results)
+        timings['Visualization'] = time.time() - t0
+        # 6. Generate Final Status
+        timing_str = f"Encoding {timings['Encoding']:.1f}s, Retrieval {timings['Retrieval']:.1f}s, Download {timings['Download']:.1f}s, Visualization {timings['Visualization']:.1f}s\n\n"
+        status_msg = timing_str + generate_status_msg(len(filtered_indices), threshold/100.0, results)
+        all_results = get_all_results_metadata(model, filtered_indices, probs)
+        results_txt = format_results_to_text(all_results)
+        yield gr.update(visible=False), gallery_items, status_msg, fig_results, [geo_dist_map, fig_results, results_txt], df_filtered, gr.update(value=geo_dist_map, visible=True)
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        yield None, None, f"Error: {str(e)}", None, None, None, None
+def generate_status_msg(count, threshold, results):
+    status_msg = f"Found {count} matches in top {threshold*100:.0f}‰.\n\nTop {len(results)} similar images:\n"
+    for i, res in enumerate(results[:3]):
+        status_msg += f"{i+1}. Product ID: {res['id']}, Location: ({res['lat']:.4f}, {res['lon']:.4f}), Score: {res['score']:.4f}\n"
+    return status_msg
+def get_initial_plot():
+    # Use FarSLIP as default for initial plot, fallback to SigLIP
+    df_vis = None
+    img = None
+    if 'FarSLIP' in models and models['FarSLIP'].df_embed is not None:
+        img, df_vis = plot_global_map_static(models['FarSLIP'].df_embed)
+        # fig = plot_global_map(models['FarSLIP'].df_embed)
+    elif 'SigLIP' in models and models['SigLIP'].df_embed is not None:
+        img, df_vis = plot_global_map_static(models['SigLIP'].df_embed)
+    return gr.update(value=img, visible=True), [img], df_vis, gr.update(visible=False)
+def handle_map_click(evt: gr.SelectData, df_vis):
+    if evt is None:
+        return None, None, None, "No point selected."
+    try:
+        x, y = evt.index[0], evt.index[1]
+        # Image dimensions (New)
+        img_width = 4000
+        img_height = 2000
+        # Scaled Margins (Proportional to 4000x2000)
+        left_margin = 110
+        right_margin = 110
+        top_margin = 100
+        bottom_margin = 67
+        plot_width = img_width - left_margin - right_margin
+        plot_height = img_height - top_margin - bottom_margin
+        # Adjust for aspect ratio preservation
+        map_aspect = 360.0 / 180.0  # 2.0
+        plot_aspect = plot_width / plot_height
+        if plot_aspect > map_aspect:
+            actual_map_width = plot_height * map_aspect
+            actual_map_height = plot_height
+            h_offset = (plot_width - actual_map_width) / 2
+            v_offset = 0
+        else:
+            actual_map_width = plot_width
+            actual_map_height = plot_width / map_aspect
+            h_offset = 0
+            v_offset = (plot_height - actual_map_height) / 2
+        # Calculate relative position within the plot area
+        x_in_plot = x - left_margin
+        y_in_plot = y - top_margin
+        # Check if click is within the actual map bounds
+        if (x_in_plot < h_offset or x_in_plot > h_offset + actual_map_width or
+            y_in_plot < v_offset or y_in_plot > v_offset + actual_map_height):
+            return None, None, None, "Click outside map area. Please click on the map."
+        # Calculate relative position within the map (0 to 1)
+        x_rel = (x_in_plot - h_offset) / actual_map_width
+        y_rel = (y_in_plot - v_offset) / actual_map_height
+        # Clamp to [0, 1]
+        x_rel = max(0, min(1, x_rel))
+        y_rel = max(0, min(1, y_rel))
+        # Convert to geographic coordinates
+        lon = x_rel * 360 - 180
+        lat = 90 - y_rel * 180
+        # Find nearest point in df_vis if available
+        pid = ""
+        if df_vis is not None:
+            dists = (df_vis['centre_lat'] - lat)**2 + (df_vis['centre_lon'] - lon)**2
+            min_idx = dists.idxmin()
+            nearest_row = df_vis.loc[min_idx]
+            if dists[min_idx] < 25:
+                lat = nearest_row['centre_lat']
+                lon = nearest_row['centre_lon']
+                pid = nearest_row['product_id']
+    except Exception as e:
+        print(f"Error handling click: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, None, None, f"Error: {e}"
+    return lat, lon, pid, f"Selected Point: ({lat:.4f}, {lon:.4f})"
+def download_image_by_location(lat, lon, pid, model_name):
+    """Download and return the image at the specified location"""
+    if lat is None or lon is None:
+        return None, "Please specify coordinates first."
+    model, error = get_active_model(model_name)
+    if error:
+        return None, error
+    try:
+        # Convert to float to ensure proper formatting
+        lat = float(lat)
+        lon = float(lon)
+        # Find Product ID if not provided
+        if not pid:
+            df = model.df_embed
+            lats = pd.to_numeric(df['centre_lat'], errors='coerce')
+            lons = pd.to_numeric(df['centre_lon'], errors='coerce')
+            dists = (lats - lat)**2 + (lons - lon)**2
+            nearest_idx = dists.idxmin()
+            pid = df.loc[nearest_idx, 'product_id']
+        # Download image
+        img_384, _ = download_and_process_image(pid, df_source=model.df_embed, verbose=True)
+        if img_384 is None:
+            return None, f"Failed to download image for location ({lat:.4f}, {lon:.4f})"
+        return img_384, f"Downloaded image at ({lat:.4f}, {lon:.4f})"
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return None, f"Error: {str(e)}"
+def reset_to_global_map():
+    """Reset the map to the initial global distribution view"""
+    img = None
+    df_vis = None
+    if 'FarSLIP' in models and models['FarSLIP'].df_embed is not None:
+        img, df_vis = plot_global_map_static(models['FarSLIP'].df_embed)
+    elif 'SigLIP' in models and models['SigLIP'].df_embed is not None:
+        img, df_vis = plot_global_map_static(models['SigLIP'].df_embed)
+    return gr.update(value=img, visible=True), [img], df_vis
+def format_results_to_text(results):
+    if not results:
+        return "No results found."
+    txt = f"Top {len(results)} Retrieval Results\n"
+    txt += "=" * 30 + "\n\n"
+    for i, res in enumerate(results):
+        txt += f"Rank: {i+1}\n"
+        txt += f"Product ID: {res['id']}\n"
+        txt += f"Location: Latitude {res['lat']:.6f}, Longitude {res['lon']:.6f}\n"
+        txt += f"Similarity Score: {res['score']:.6f}\n"
+        txt += "-" * 30 + "\n"
+    return txt
+def save_plot(figs):
+    if figs is None:
+        return None
+    try:
+        # If it's a single image (initial state), save as png
+        if isinstance(figs, PILImage.Image):
+             fd, path = tempfile.mkstemp(suffix='.png', prefix='earth_explorer_map_')
+             os.close(fd)
+             figs.save(path)
+             return path
+        # If it's a list/tuple of images [map_img, results_img]
+        if isinstance(figs, (list, tuple)):
+            # If only one image in list, save as PNG
+            if len(figs) == 1 and isinstance(figs[0], PILImage.Image):
+                 fd, path = tempfile.mkstemp(suffix='.png', prefix='earth_explorer_map_')
+                 os.close(fd)
+                 figs[0].save(path)
+                 return path
+            fd, zip_path = tempfile.mkstemp(suffix='.zip', prefix='earth_explorer_results_')
+            os.close(fd)
+            with zipfile.ZipFile(zip_path, 'w') as zipf:
+                # Save Map
+                if figs[0] is not None:
+                    map_path = os.path.join(tempfile.gettempdir(), 'map_distribution.png')
+                    figs[0].save(map_path)
+                    zipf.write(map_path, arcname='map_distribution.png')
+                # Save Results
+                if len(figs) > 1 and figs[1] is not None:
+                    res_path = os.path.join(tempfile.gettempdir(), 'retrieval_results.png')
+                    figs[1].save(res_path)
+                    zipf.write(res_path, arcname='retrieval_results.png')
+                # Save Results Text
+                if len(figs) > 2 and figs[2] is not None:
+                    txt_path = os.path.join(tempfile.gettempdir(), 'results.txt')
+                    with open(txt_path, 'w', encoding='utf-8') as f:
+                        f.write(figs[2])
+                    zipf.write(txt_path, arcname='results.txt')
+            return zip_path
+        # Fallback for Plotly figure (if any)
+        # Create a temporary file
+        fd, path = tempfile.mkstemp(suffix='.html', prefix='earth_explorer_plot_')
+        os.close(fd)
+        # Write to the temporary file
+        figs.write_html(path)
+        return path
+    except Exception as e:
+        print(f"Error saving: {e}")
+        return None
+# Gradio Blocks Interface
+with gr.Blocks(title="EarthEmbeddingExplorer") as demo:
+    gr.Markdown("# EarthEmbeddingExplorer")
+    gr.HTML("""
+    <div style="font-size: 1.2em;">
+    EarthEmbeddingExplorer is a tool that allows you to search for satellite images of the Earth using natural language descriptions, images, geolocations, or a simple a click on the map. For example, you can type "tropical rainforest" or "coastline with a city," and the system will find locations on Earth that match your description. It then visualizes these locations on a world map and displays the top matching images.
+    </div>
+    <div style="display: flex; gap: 0.2em; align-items: center; justify-content: center;">
+        <a href="https://www.modelscope.cn/studios/VoyagerX/EarthExplorer"><img src="https://img.shields.io/badge/Open in ModelScope.cn-xGPU-624aff"></a>
+        <a href="https://www.modelscope.ai/studios/VoyagerX/EarthExplorer"><img src="https://img.shields.io/badge/Open in ModelScope.ai-CPU-624aff"></a>
+        <a href="https://huggingface.co/spaces/ML4Sustain/EarthExplorer"><img src="https://img.shields.io/badge/Open in HF Space-CPU-FFD21E"></a>
+        <a href="https://modelscope.cn/studios/VoyagerX/EarthExplorer/file/view/master/Tutorial.md?status=1"> <img src="https://img.shields.io/badge/Tutorial-📖-007bff"> </a>
+        <a href="https://www.modelscope.cn/learn/3958"> <img src="https://img.shields.io/badge/中文教程-📖-007bff"> </a>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=4):
+            with gr.Tabs():
+                with gr.TabItem("Text Search") as tab_text:
+                    model_selector_text = gr.Dropdown(choices=["SigLIP", "FarSLIP"], value="FarSLIP", label="Model")
+                    query_input = gr.Textbox(label="Query", placeholder="e.g., rainforest, glacier")
+                    gr.Examples(
+                        examples=[
+                            ["a satellite image of a river around a city"],
+                            ["a satellite image of a rainforest"],
+                            ["a satellite image of a slum"],
+                            ["a satellite image of a glacier"],
+                            ["a satellite image of snow covered mountains"]
+                        ],
+                        inputs=[query_input],
+                        label="Text Examples"
+                    )
+                    search_btn = gr.Button("Search by Text", variant="primary")
+                with gr.TabItem("Image Search") as tab_image:
+                    model_selector_img = gr.Dropdown(choices=["SigLIP", "FarSLIP", "SatCLIP"], value="FarSLIP", label="Model")
+                    gr.Markdown("### Option 1: Upload or Select Image")
+                    image_input = gr.Image(type="pil", label="Upload Image")
+                    gr.Examples(
+                        examples=[
+                            ["./examples/example1.png"],
+                            ["./examples/example2.png"],
+                            ["./examples/example3.png"]
+                        ],
+                        inputs=[image_input],
+                        label="Image Examples"
+                    )
+                    gr.Markdown("### Option 2: Click Map or Enter Coordinates")
+                    btn_reset_map_img = gr.Button("🔄 Reset Map to Global View", variant="secondary", size="sm")
+                    with gr.Row():
+                        img_lat = gr.Number(label="Latitude", interactive=True)
+                        img_lon = gr.Number(label="Longitude", interactive=True)
+                    img_pid = gr.Textbox(label="Product ID (auto-filled)", visible=False)
+                    img_click_status = gr.Markdown("")
+                    btn_download_img = gr.Button("Download Image by Geolocation", variant="secondary")
+                    search_img_btn = gr.Button("Search by Image", variant="primary")
+                with gr.TabItem("Location Search") as tab_location:
+                    gr.Markdown("Search using **SatCLIP** location encoder.")
+                    gr.Markdown("### Click Map or Enter Coordinates")
+                    btn_reset_map_loc = gr.Button("🔄 Reset Map to Global View", variant="secondary", size="sm")
+                    with gr.Row():
+                        lat_input = gr.Number(label="Latitude", value=30.0, interactive=True)
+                        lon_input = gr.Number(label="Longitude", value=120.0, interactive=True)
+                    loc_pid = gr.Textbox(label="Product ID (auto-filled)", visible=False)
+                    loc_click_status = gr.Markdown("")
+                    gr.Examples(
+                        examples=[
+                            [30.32, 120.15],
+                            [40.7128, -74.0060],
+                            [24.65, 46.71],
+                            [-3.4653, -62.2159],
+                            [64.4, 16.8]
+                        ],
+                        inputs=[lat_input, lon_input],
+                        label="Location Examples"
+                    )
+                    search_loc_btn = gr.Button("Search by Location", variant="primary")
+            threshold_slider = gr.Slider(minimum=1, maximum=30, value=7, step=1, label="Top Percentage (‰)")
+            status_output = gr.Textbox(label="Status", lines=10)
+            save_btn = gr.Button("Download Result")
+            download_file = gr.File(label="Zipped Results", height=40)
+        with gr.Column(scale=6):
+            plot_map = gr.Image(
+                label="Geographical Distribution",
+                type="pil",
+                interactive=False,
+                height=400,
+                width=800,
+                visible=True
+            )
+            plot_map_interactive = gr.Plot(
+                label="Geographical Distribution (Interactive)",
+                visible=False
+            )
+            results_plot = gr.Image(label="Top 5 Matched Images", type="pil")
+            gallery_images = gr.Gallery(label="Top Retrieved Images (Zoom)", columns=3, height="auto")
+    current_fig = gr.State()
+    map_data_state = gr.State()
+    # Initial Load
+    demo.load(fn=get_initial_plot, outputs=[plot_map, current_fig, map_data_state, plot_map_interactive])
+    # Reset Map Buttons
+    btn_reset_map_img.click(
+        fn=reset_to_global_map,
+        outputs=[plot_map, current_fig, map_data_state]
+    )
+    btn_reset_map_loc.click(
+        fn=reset_to_global_map,
+        outputs=[plot_map, current_fig, map_data_state]
+    )
+    # Map Click Event - updates Image Search coordinates
+    plot_map.select(
+        fn=handle_map_click,
+        inputs=[map_data_state],
+        outputs=[img_lat, img_lon, img_pid, img_click_status]
+    )
+    # Map Click Event - also updates Location Search coordinates
+    plot_map.select(
+        fn=handle_map_click,
+        inputs=[map_data_state],
+        outputs=[lat_input, lon_input, loc_pid, loc_click_status]
+    )
+    # Download Image by Geolocation
+    btn_download_img.click(
+        fn=download_image_by_location,
+        inputs=[img_lat, img_lon, img_pid, model_selector_img],
+        outputs=[image_input, img_click_status]
+    )
+    # Search Event (Text)
+    search_btn.click(
+        fn=search_text,
+        inputs=[query_input, threshold_slider, model_selector_text],
+        outputs=[plot_map_interactive, gallery_images, status_output, results_plot, current_fig, map_data_state, plot_map]
+    )
+    # Search Event (Image)
+    search_img_btn.click(
+        fn=search_image,
+        inputs=[image_input, threshold_slider, model_selector_img],
+        outputs=[plot_map_interactive, gallery_images, status_output, results_plot, current_fig, map_data_state, plot_map]
+    )
+    # Search Event (Location)
+    search_loc_btn.click(
+        fn=search_location,
+        inputs=[lat_input, lon_input, threshold_slider],
+        outputs=[plot_map_interactive, gallery_images, status_output, results_plot, current_fig, map_data_state, plot_map]
+    )
+    # Save Event
+    save_btn.click(
+        fn=save_plot,
+        inputs=[current_fig],
+        outputs=[download_file]
+    )
+    # Tab Selection Events
+    def show_static_map():
+        return gr.update(visible=True), gr.update(visible=False)
+    tab_text.select(fn=show_static_map, outputs=[plot_map, plot_map_interactive])
+    tab_image.select(fn=show_static_map, outputs=[plot_map, plot_map_interactive])
+    tab_location.select(fn=show_static_map, outputs=[plot_map, plot_map_interactive])
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

configs/huggingface.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+siglip:
+  ckpt_path: "hf"
+  model_name: "ViT-SO400M-14-SigLIP-384"
+  tokenizer_path: "hf"
+  embedding_path: "hf://ML4Sustain/EarthEmbeddings/uniform_sample_250k/siglip/SigLIP_grid_sample_center_384x384_243k.parquet"
+farslip:
+  ckpt_path: "hf"
+  model_name: "ViT-B-16"
+  embedding_path: "hf://ML4Sustain/EarthEmbeddings/uniform_sample_250k/farslip/FarSLIP_grid_sample_center_384x384_243k.parquet"
+satclip:
+  ckpt_path: "hf"
+  embedding_path: "hf://ML4Sustain/EarthEmbeddings/uniform_sample_250k/satclip/SatCLIP_grid_sample_center_384x384_243k.parquet"

countries.geo.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data_utils.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import fsspec
+import pyarrow.parquet as pq
+import numpy as np
+from PIL import Image
+from io import BytesIO
+from rasterio.io import MemoryFile
+import matplotlib.pyplot as plt
+import cartopy.crs as ccrs
+import cartopy.io.img_tiles as cimgt
+from matplotlib.patches import Rectangle
+import math
+from matplotlib.figure import Figure
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+def crop_center(img_array, cropx, cropy):
+    y, x, c = img_array.shape
+    startx = x // 2 - (cropx // 2)
+    starty = y // 2 - (cropy // 2)
+    return img_array[starty:starty+cropy, startx:startx+cropx]
+def read_tif_bytes(tif_bytes):
+    with MemoryFile(tif_bytes) as mem_f:
+        with mem_f.open(driver='GTiff') as f:
+            return f.read().squeeze()
+def read_row_memory(row_dict, columns=["thumbnail"]):
+    url = row_dict['parquet_url']
+    row_idx = row_dict['parquet_row']
+    fs_options = {
+        "cache_type": "readahead",
+        "block_size": 5 * 1024 * 1024
+    }
+    with fsspec.open(url, mode='rb', **fs_options) as f:
+        with pq.ParquetFile(f) as pf:
+            table = pf.read_row_group(row_idx, columns=columns)
+    row_output = {}
+    for col in columns:
+        col_data = table[col][0].as_py()
+        if col != 'thumbnail':
+            row_output[col] = read_tif_bytes(col_data)
+        else:
+            stream = BytesIO(col_data)
+            row_output[col] = Image.open(stream)
+    return row_output
+def download_and_process_image(product_id, df_source=None, verbose=True):
+    if df_source is None:
+        if verbose: print("❌ Error: No DataFrame provided.")
+        return None, None
+    row_subset = df_source[df_source['product_id'] == product_id]
+    if len(row_subset) == 0:
+        if verbose: print(f"❌ Error: Product ID {product_id} not found in DataFrame.")
+        return None, None
+    row_dict = row_subset.iloc[0].to_dict()
+    if 'parquet_url' in row_dict:
+        url = row_dict['parquet_url']
+        if 'huggingface.co' in url:
+            row_dict['parquet_url'] = url.replace('https://huggingface.co', 'https://modelscope.cn').replace('resolve/main', 'resolve/master')
+        elif 'hf-mirror.com' in url:
+            row_dict['parquet_url'] = url.replace('https://hf-mirror.com', 'https://modelscope.cn').replace('resolve/main', 'resolve/master')
+    else:
+        if verbose: print("❌ Error: 'parquet_url' missing in metadata.")
+        return None, None
+    if verbose: print(f"⬇️ Fetching data for {product_id} from {row_dict['parquet_url']}...")
+    try:
+        bands_data = read_row_memory(row_dict, columns=['B04', 'B03', 'B02'])
+        if not all(b in bands_data for b in ['B04', 'B03', 'B02']):
+             if verbose: print(f"❌ Error: Missing bands in fetched data for {product_id}")
+             return None, None
+        rgb_img = np.stack([bands_data['B04'], bands_data['B03'], bands_data['B02']], axis=-1)
+        if verbose:
+            print(f"Raw RGB stats: Min={rgb_img.min()}, Max={rgb_img.max()}, Mean={rgb_img.mean()}, Dtype={rgb_img.dtype}")
+        # Check if data is already 0-255 or 0-1
+        if rgb_img.max() <= 255:
+             # Assume it might be uint8 or scaled
+             pass
+        rgb_norm = (2.5 * (rgb_img.astype(float) / 10000.0)).clip(0, 1)
+        rgb_uint8 = (rgb_norm * 255).astype(np.uint8)
+        if verbose:
+             print(f"Processed RGB stats: Min={rgb_uint8.min()}, Max={rgb_uint8.max()}, Mean={rgb_uint8.mean()}")
+        img_full = Image.fromarray(rgb_uint8)
+        if rgb_uint8.shape[0] >= 384 and rgb_uint8.shape[1] >= 384:
+            cropped_array = crop_center(rgb_uint8, 384, 384)
+            img_384 = Image.fromarray(cropped_array)
+        else:
+            if verbose: print(f"⚠️ Image too small {rgb_uint8.shape}, resizing to 384x384.")
+            img_384 = img_full.resize((384, 384))
+        if verbose: print(f"✅ Successfully processed {product_id}")
+        return img_384, img_full
+    except Exception as e:
+        if verbose: print(f"❌ Error processing {product_id}: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, None
+# Define Esri Imagery Class
+class EsriImagery(cimgt.GoogleTiles):
+    def _image_url(self, tile):
+        x, y, z = tile
+        return f'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}'
+from PIL import Image, ImageDraw, ImageFont
+def get_placeholder_image(text="Image Unavailable", size=(384, 384)):
+    img = Image.new('RGB', size, color=(200, 200, 200))
+    d = ImageDraw.Draw(img)
+    try:
+        # Try to load a default font
+        font = ImageFont.load_default()
+    except:
+        font = None
+    # Draw text in center (rough approximation)
+    # For better centering we would need font metrics, but simple is fine here
+    d.text((20, size[1]//2), text, fill=(0, 0, 0), font=font)
+    return img
+def get_esri_satellite_image(lat, lon, score=None, rank=None, query=None):
+    """
+    Generates a satellite image visualization using Esri World Imagery via Cartopy.
+    Matches the style of the provided notebook.
+    Uses OO Matplotlib API for thread safety.
+    """
+    try:
+        imagery = EsriImagery()
+        # Create figure using OO API
+        fig = Figure(figsize=(5, 5), dpi=100)
+        canvas = FigureCanvasAgg(fig)
+        ax = fig.add_subplot(1, 1, 1, projection=imagery.crs)
+        # Set extent to approx 10km x 10km around the point
+        extent_deg = 0.05
+        ax.set_extent([lon - extent_deg, lon + extent_deg, lat - extent_deg, lat + extent_deg], crs=ccrs.PlateCarree())
+        # Add the imagery
+        ax.add_image(imagery, 14)
+        # Add a marker for the center
+        ax.plot(lon, lat, marker='+', color='yellow', markersize=12, markeredgewidth=2, transform=ccrs.PlateCarree())
+        # Add Bounding Box (3840m x 3840m)
+        box_size_m = 384 * 10 # 3840m
+        # Convert meters to degrees (approx)
+        # 1 deg lat = 111320m
+        # 1 deg lon = 111320m * cos(lat)
+        dlat = (box_size_m / 111320)
+        dlon = (box_size_m / (111320 * math.cos(math.radians(lat))))
+        # Bottom-Left corner
+        rect_lon = lon - dlon / 2
+        rect_lat = lat - dlat / 2
+        # Add Rectangle
+        rect = Rectangle((rect_lon, rect_lat), dlon, dlat,
+                        linewidth=2, edgecolor='red', facecolor='none', transform=ccrs.PlateCarree())
+        ax.add_patch(rect)
+        # Title
+        title_parts = []
+        if query: title_parts.append(f"{query}")
+        if rank is not None: title_parts.append(f"Rank {rank}")
+        if score is not None: title_parts.append(f"Score: {score:.4f}")
+        ax.set_title("\n".join(title_parts), fontsize=10)
+        # Save to buffer
+        buf = BytesIO()
+        fig.savefig(buf, format='png', bbox_inches='tight')
+        buf.seek(0)
+        return Image.open(buf)
+    except Exception as e:
+        # Suppress full traceback for network errors to avoid log spam
+        error_msg = str(e)
+        if "Connection reset by peer" in error_msg or "Network is unreachable" in error_msg or "urlopen error" in error_msg:
+            print(f"⚠️ Network warning: Could not fetch Esri satellite map for ({lat:.4f}, {lon:.4f}). Server might be offline.")
+        else:
+            print(f"Error generating Esri image for {lat}, {lon}: {e}")
+            # Only print traceback for non-network errors
+            # import traceback
+            # traceback.print_exc()
+        # Return a placeholder image with text
+        return get_placeholder_image(f"Map Unavailable\n({lat:.2f}, {lon:.2f})")
+def get_esri_satellite_image_url(lat, lon, zoom=14):
+    """
+    Returns the URL for the Esri World Imagery tile at the given location.
+    """
+    try:
+        imagery = EsriImagery()
+        # Calculate tile coordinates
+        # This is a simplification, cimgt handles this internally usually
+        # But for direct URL we might need more logic or just use the static map approach above
+        # For now, let's stick to the static map generation which works
+        pass
+    except:
+        pass
+    return None

embedding_datasets/grid_sample_center_22k_FarSLIP_384x384.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3555e0279742daa7ee27ba5587a8234f791966ce4411ef804455ee03af52e1aa
+size 23547770

embedding_datasets/grid_sample_center_22k_SatCLIP_384x384.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76484097dea1f0fc65e4f2c8d3e825ec3ccda8914da83e3a65aabd86a4f59ec2
+size 25158503

embedding_datasets/grid_sample_center_22k_SigLIP_384x384.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a34d949f704f8f4d9d963f28dbe547c341591645cf86d191587a3cc0a866855f
+size 50178408

embedding_datasets/grid_sample_metadata.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:808fde21fdae5ef2dc8183c7e8017b286dc2d2419ed64e6058358291cbeef06c
+size 1999889

embedding_datasets/zhejiang_sample_center_2k_FarSLIP_384x384.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4bc51828dd58d45c62d3168557870e8db6659c0c52e9661865326cafb11c88b
+size 2088911

embedding_datasets/zhejiang_sample_center_2k_SatCLIP_384x384.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81bdf991b1d6a100108d0cad730bf79b1e9f558f261f2d4fb18c3f68c9ff2796
+size 2719357

embedding_datasets/zhejiang_sample_center_2k_SigLIP_384x384.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47ab37984d86b9b15949448f36d52022c24c40e7ff3bc65f44510ff08d0cbe81
+size 4381379

embedding_datasets/zhejiang_sample_metadata.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb7c46e7985c05cb010e4fd308489865271b43b250844b8d42a3fe8263d01a78
+size 159438

examples/example1.png ADDED Viewed

Git LFS Details

SHA256: 07dd836c4dfe700657f163afdae9ebf2685f83dca1417078b3147c8c31f598a9
Pointer size: 131 Bytes
Size of remote file: 225 kB

examples/example2.png ADDED Viewed

Git LFS Details

SHA256: e52a44517c028cb6b9828c37c974991fb20122f6cdba951e809ac66b7c591552
Pointer size: 132 Bytes
Size of remote file: 1.27 MB

examples/example3.png ADDED Viewed

Git LFS Details

SHA256: d63b587c17943eb1e60f511def466696c1a12a323f0f67dff99da7631e2e48aa
Pointer size: 131 Bytes
Size of remote file: 507 kB

images/CLIP.png ADDED Viewed

Git LFS Details

SHA256: b6005b0baf8fa09c54d102f75f437a1f3445f5f2fa512ac78513b77809e83363
Pointer size: 131 Bytes
Size of remote file: 192 kB

images/Image_Search_Amazon.jpg ADDED Viewed

Git LFS Details

SHA256: d3dda8f0b849b76048cd8f2013232a31cd3d30af248eb86bee0fe724513e4f58
Pointer size: 131 Bytes
Size of remote file: 791 kB

images/Image_Search_Middle_East.jpg ADDED Viewed

Git LFS Details

SHA256: 7ac5769228c0869bf43cc4c9a2a202159937c44f8b872ec15b1b9c93159b414e
Pointer size: 132 Bytes
Size of remote file: 1.22 MB

images/Location_Search_Amazon.jpg ADDED Viewed

Git LFS Details

SHA256: 8614bae60fd581e464effe269d9dd5fb74c389136716219f8c0db72ad3560f53
Pointer size: 131 Bytes
Size of remote file: 905 kB

images/Location_Search_Hangzhou.jpg ADDED Viewed

Git LFS Details

SHA256: edd2aeb10fff28156716a99a1fa354bc461a6dad13ea4fcfcf768e539549251f
Pointer size: 131 Bytes
Size of remote file: 963 kB

images/Text_Search.jpg ADDED Viewed

Git LFS Details

SHA256: fd8b3ae6ba248fb713d06f93d14560814cd925b35edad9227711cf2e4933a901
Pointer size: 131 Bytes
Size of remote file: 942 kB

images/embedding.png ADDED Viewed

Git LFS Details

SHA256: e64060fe753c5f322b75645ddb1ff5380a41f5581e62136f3b67befda77abcd0
Pointer size: 130 Bytes
Size of remote file: 91.4 kB

images/framework_en.png ADDED Viewed

Git LFS Details

SHA256: 7b92ac54e73b446641c84d004b71a013ed85d08d308978880e76928d654bd89e
Pointer size: 131 Bytes
Size of remote file: 443 kB

images/framework_zh.png ADDED Viewed

Git LFS Details

SHA256: 32c7e6756a68db25e1a6d30c8567da9a5fedf6f6600d507b26b3f98e5eb34a86
Pointer size: 131 Bytes
Size of remote file: 535 kB

images/samples.png ADDED Viewed

Git LFS Details

SHA256: 122e7e4c21b01fc14325ce794d5286c0e1abbd6ae3c42cf102907c7e209df65e
Pointer size: 132 Bytes
Size of remote file: 2.78 MB

models/FarSLIP/.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+**/logs/
+**/wandb/
+models/
+features/
+results/
+src/open_clip_train/config.py
+src/open_clip_train/output_samples/
+**/results_retrieval/
+**/results_classification/
+checkpoints/
+tests/data/
+*.pt
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+sync.sh
+gpu1sync.sh
+.idea
+*.pdf
+**/._*
+**/*DS_*
+**.jsonl
+src/sbatch
+src/misc
+.vscode
+src/debug
+core.*
+*.out
+# Allow
+!src/evaluation/misc/results_dbs/*