rodolphethinks1 commited on
Commit
fca871f
·
verified ·
1 Parent(s): 03489c8

Create utils/stac_client.py

Browse files
Files changed (1) hide show
  1. src/utils/stac_client.py +199 -0
src/utils/stac_client.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ from datetime import datetime, timedelta
3
+
4
+ import random
5
+ import requests
6
+ from pystac_client import Client
7
+ import os
8
+
9
+ from src.auth.auth import get_direct_access_token
10
+ from src.utils.image import extract_url_after_filename
11
+
12
+ class ProductDownloader:
13
+ """
14
+ Class for downloading products from Copernicus Data Space Ecosystem.
15
+
16
+ This class provides methods to download products using an S3 client connection,
17
+ either as in-memory content (bytes) or as files saved to disk.
18
+ """
19
+
20
+ def __init__(self, s3_client, bucket_name='eodata'):
21
+ """
22
+ Initialize the product downloader with an S3 client.
23
+
24
+ Args:
25
+ s3_client: The boto3 S3 client to use for downloads
26
+ bucket_name (str): The S3 bucket name where products are stored (default: 'eodata')
27
+ """
28
+
29
+ self.s3_client = s3_client
30
+ self.bucket_name = bucket_name
31
+
32
+
33
+ def get_product_content(self, product_path):
34
+ """
35
+ Download a Sentinel-2 product directly from Copernicus Data Space Ecosystem as a bytes object.
36
+
37
+ Args:
38
+ product_path (str): S3 key or full S3 URI to the product
39
+
40
+ Returns:
41
+ bytes: The product content as bytes
42
+ str: The filename of the product
43
+ """
44
+ # Extract S3 key if full URI is provided
45
+
46
+ # Extract the filename from the path
47
+ _, filename = os.path.split(product_path)
48
+
49
+ # Download the file to a bytes buffer
50
+ try:
51
+ # Create a bytes buffer
52
+ buffer = io.BytesIO()
53
+
54
+ # Download the file to the buffer using the client
55
+ self.s3_client.download_fileobj(self.bucket_name, product_path, buffer)
56
+
57
+ # Reset buffer position to the start
58
+ buffer.seek(0)
59
+
60
+ # Get the bytes
61
+ product_content = buffer.getvalue()
62
+
63
+ print(f"Successfully downloaded product: {filename}")
64
+
65
+ # Return both the bytes and the filename
66
+ return product_content, filename
67
+ except Exception as e:
68
+ print(f"Error downloading product: {str(e)}")
69
+ raise
70
+
71
+ async def download_product(self, product_path, output_filename=None):
72
+ """
73
+ Download a Sentinel-2 product directly from Copernicus Data Space Ecosystem to disk.
74
+
75
+ Args:
76
+ product_path (str): S3 key or full S3 URI to the product
77
+ output_filename (str, optional): Filename to save the product to.
78
+ If None, uses the original filename.
79
+
80
+ Returns:
81
+ str: Path to the downloaded file
82
+ """
83
+ # Extract S3 key if full URI is provided
84
+ if product_path.startswith('s3://'):
85
+ product_path = self.get_s3_key_from_href(product_path)
86
+
87
+ # Extract the filename from the path
88
+ _, filename = os.path.split(product_path)
89
+
90
+ # Use custom filename if provided, otherwise use the original
91
+ if output_filename is None:
92
+ output_filename = filename
93
+
94
+ # Download the file using the client
95
+ try:
96
+ self.s3_client.download_file(self.bucket_name, product_path, output_filename)
97
+ print(f"Successfully downloaded product {filename} to {output_filename}")
98
+ return output_filename
99
+ except Exception as e:
100
+ print(f"Error downloading product: {str(e)}")
101
+ raise
102
+
103
+ from src.auth.auth import get_direct_access_token
104
+ from src.utils.image import extract_url_after_filename
105
+
106
+ def download_sentinel_image(username, password, start_date, end_date,
107
+ bbox=[-180, -90, 180, 90], limit=10):
108
+ """
109
+ Download a random Sentinel-2 image based on criteria.
110
+
111
+ Args:
112
+ username (str): DESTINE username
113
+ password (str): DESTINE password
114
+ # date_range (str): Date range in format "YYYY-MM-DD/YYYY-MM-DD"
115
+ cloud_cover (int, optional): Maximum cloud cover percentage
116
+ bbox (list): Bounding box coordinates [west, south, east, north]
117
+ limit (int): Maximum number of results to return
118
+
119
+ Returns:
120
+ tuple: (image_content or error_message, metadata)
121
+ """
122
+ # Get access token
123
+ token_result = get_direct_access_token(username=username, password=password)
124
+ if not token_result:
125
+ return "Failed to authenticate", None
126
+
127
+ access_token = token_result["access_token"]
128
+
129
+ # Set up STAC API client
130
+ stac_base_url = "https://cachea.destine.eu"
131
+ stac_url = f"{stac_base_url}/stac/api"
132
+ catalog = Client.open(stac_url)
133
+
134
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
135
+ end_date = datetime.strptime(end_date, "%Y-%m-%d")
136
+ days_between = (end_date - start_date).days
137
+ random_start_day = random.randint(0, days_between - 7) # Ensure we have 7 days
138
+ random_start_date = start_date + timedelta(days=random_start_day)
139
+ random_end_date = random_start_date + timedelta(days=1)
140
+
141
+ # Format dates for the API
142
+ start_date_str = random_start_date.strftime("%Y-%m-%d")
143
+ end_date_str = random_end_date.strftime("%Y-%m-%d")
144
+
145
+ # Build search parameters
146
+ search_params = {
147
+ "method": "GET",
148
+ "collections": ["SENTINEL-2"],
149
+ "bbox": bbox,
150
+ "datetime": f"{start_date_str}/{end_date_str}",
151
+ "limit": limit
152
+ }
153
+
154
+
155
+ # Search for Sentinel-2 images
156
+ search = catalog.search(**search_params)
157
+
158
+ # Get a list of items
159
+ items = list(search.items())
160
+ if not items:
161
+ return "No Sentinel-2 images found", None
162
+
163
+ # Select a random item
164
+ random_item = random.choice(items)
165
+
166
+ # Get metadata for the selected item
167
+ metadata = {
168
+ "id": random_item.id,
169
+ "datetime": random_item.datetime.strftime("%Y-%m-%d %H:%M:%S"),
170
+ "bbox": random_item.bbox,
171
+ }
172
+
173
+
174
+ # Get the assets of the random item
175
+ assets = random_item.assets
176
+ asset_keys = list(assets.keys())
177
+
178
+ # Filter the assets to get the one that ends with *_TCI_60m.jp2
179
+ tci_assets = [assets[key].href for key in asset_keys if assets[key].href.endswith('_TCI_60m.jp2')]
180
+
181
+ if not tci_assets:
182
+ return "No TCI assets found in the selected image", None
183
+
184
+ filepath = extract_url_after_filename(tci_assets[0])
185
+ metadata["filename"] = os.path.basename(filepath)
186
+
187
+ # Download the file
188
+ url = f"{stac_base_url}/stac/download?filename={filepath}"
189
+
190
+ headers = {
191
+ 'Authorization': f'Bearer {access_token}'
192
+ }
193
+
194
+ response = requests.post(url, headers=headers, data={})
195
+
196
+ if response.status_code == 200:
197
+ return response.content, metadata
198
+ else:
199
+ return f"Failed to download the file. Status code: {response.status_code}", None