Spaces:
Running
on
L4
Running
on
L4
File size: 5,684 Bytes
8cc8f31 552b1cf 8cc8f31 d54265f 8cc8f31 d54265f 8cc8f31 552b1cf 6eb83e3 8cc8f31 d54265f 552b1cf 6eb83e3 552b1cf 6eb83e3 552b1cf 8d89494 75bf345 8d89494 552b1cf 8cc8f31 8d89494 6eb83e3 8cc8f31 d4b7ad4 8cc8f31 8d89494 8cc8f31 552b1cf 8d89494 8cc8f31 4a51190 c60e230 8d89494 8cc8f31 7605761 8cc8f31 c9fce28 552b1cf d9c6521 23fa64c 552b1cf c9fce28 8cc8f31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
from fsspec.parquet import open_parquet_file
import fsspec
import pyarrow.parquet as pq
from .grid import *
import pandas as pd
from io import BytesIO
import os
from PIL import Image
import datetime
# GLOBAL VARIABLES
if os.path.isfile('helpers/s2l2a_metadata.parquet'):
l2a_meta_path = 'helpers/s2l2a_metadata.parquet'
else:
DATASET_NAME = 'Major-TOM/Core-S2L2A'
l2a_meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
if os.path.isfile('helpers/s2l1c_metadata.parquet'):
l1c_meta_path = 'helpers/s2l1c_metadata.parquet'
else:
DATASET_NAME = 'Major-TOM/Core-S2L1C'
l1c_meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
if os.path.isfile('helpers/s1rtc_metadata.parquet'):
rtc_meta_path = 'helpers/s1rtc_metadata.parquet'
else:
DATASET_NAME = 'Major-TOM/Core-S1RTC'
rtc_meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
if os.path.isfile('helpers/dem_metadata.parquet'):
dem_meta_path = 'helpers/dem_metadata.parquet'
else:
DATASET_NAME = 'Major-TOM/Core-DEM'
dem_meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
grid = Grid(10, latitude_range=(-90,90), longitude_range=(-180,180))
l2a_df = pd.read_parquet(l2a_meta_path)
l1c_df = pd.read_parquet(l1c_meta_path)
rtc_df = pd.read_parquet(rtc_meta_path)
dem_df = pd.read_parquet(dem_meta_path)
df_dict = {
'Sentinel-2 L2A' : l2a_df,
'Sentinel-2 L1C' : l1c_df,
'Sentinel-1 RTC' : rtc_df,
'COP-DEM' : dem_df
}
'''
--- SigLIP Search ---
'''
from .TextSearch import *
BASE_DIR='./'
INDEX_OUTPUT = f'{BASE_DIR}/siglip_ivfpq.index'
METADATA_OUTPUT = f'{BASE_DIR}/siglip_ivfpq_metadata.parquet'
search = SearchSigLIP(index_path=INDEX_OUTPUT,
metadata_path=METADATA_OUTPUT)
'''
--- Helper Functions ---
'''
def pretty_date(input):
template = '%Y%m%dT%H%M%S' if 'T' in input else '%Y%m%d%H%M%S'
return datetime.datetime.strptime(input, template).strftime('%H:%M:%S - %d %b %Y')
# HELPER FUNCTIONS
def gridcell2ints(grid_string):
up = int(grid_string.split('_')[0][:-1]) * (2*int(grid_string.split('_')[0][-1]=='U') - 1) # +ve if up
right = int(grid_string.split('_')[1][:-1]) * (2*int(grid_string.split('_')[1][-1]=='R') - 1) # +ve if R
return up, right
def row2image(parquet_url, parquet_row, fullrow_read=True):
if fullrow_read:
# option 1
f=fsspec.open(parquet_url)
temp_path = f.open()
else:
# option 2
temp_path = open_parquet_file(parquet_url,columns = ["thumbnail"])
with pq.ParquetFile(temp_path) as pf:
first_row_group = pf.read_row_group(parquet_row, columns=['thumbnail'])
stream = BytesIO(first_row_group['thumbnail'][0].as_py())
return Image.open(stream)
def row2s2(parquet_url, parquet_row, s2_bands = ["B04", "B03", "B02"]):
with open_parquet_file(parquet_url,columns = s2_bands) as f:
with pq.ParquetFile(f) as pf:
first_row_group = pf.read_row_group(parquet_row, columns=s2_bands)
return first_row_group
def cell2row(grid_string, meta_df, return_row = False):
row_U, col_R = gridcell2ints(grid_string)
R = meta_df.query('grid_row_u == {} & grid_col_r == {}'.format(row_U, col_R))
if not R.empty:
if return_row:
return R.parquet_url.item(), R.parquet_row.item(), R
else:
return R.parquet_url.item(), R.parquet_row.item()
else:
return None
def map_to_image(map, return_centre=False, return_gridcell=False, return_timestamp=False, source='Sentinel-2 L2A'):
try:
# 1. get bounds
bbox = map.get_bbox()
center = [(bbox[3]+bbox[1])/2, (bbox[2]+bbox[0])/2]
except:
return None
# 2. translate coordinate to major-tom tile
rows, cols = grid.latlon2rowcol([center[0]], [center[1]])
return cell_to_image(cell="{}_{}".format(rows[0],cols[0]),
return_centre=return_centre,
return_gridcell=return_gridcell,
return_timestamp=return_timestamp,
source=source)
def text_to_image(text, return_centre=False, return_gridcell=False, return_timestamp=False, source='Sentinel-2 L2A'):
cell = search.faiss(text)[0]["grid_cell"]
return cell_to_image(cell=cell,
return_centre=return_centre,
return_gridcell=return_gridcell,
return_timestamp=return_timestamp,
source=source)
def cell_to_image(cell, return_centre=False, return_gridcell=False, return_timestamp=False, source='Sentinel-2 L2A'):
# 3. translate major-tom cell to row in parquet
df = df_dict[source]
row = cell2row(cell, df, return_row = True)
if row is not None:
parquet_url, parquet_row, meta_row = row
try:
img = row2image(parquet_url, parquet_row)
except:
return None
# 4. acquire image # X. update map
lat, lon = meta_row.centre_lat.item(), meta_row.centre_lon.item()
ret = [img.resize((1068,1068))]
if return_centre:
ret.append((lat,lon))
if return_gridcell:
ret.append(meta_row.grid_cell.item())
if return_timestamp:
if 'timestamp' in meta_row.columns:
ret.append(pretty_date(meta_row.timestamp.item()))
else:
ret.append('Not Available')
return ret
else:
return None |