rodolphethinks1 commited on
Commit
4ad7bbf
·
verified ·
1 Parent(s): 405087e

Create test.py

Browse files
Files changed (1) hide show
  1. test.py +293 -0
test.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module for connecting to Copernicus Data Space Ecosystem through S3 and STAC interfaces.
3
+ """
4
+
5
+ import io
6
+ import os
7
+ from urllib.parse import urlparse
8
+
9
+ import boto3
10
+ import pystac_client
11
+ from dotenv import load_dotenv
12
+ from PIL import Image
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+
18
+ class S3Connector:
19
+ """
20
+ A client for connecting to S3-compatible storage services.
21
+
22
+ This connector provides an interface to connect to an S3-compatible
23
+ storage service and retrieve the S3 resource object.
24
+ """
25
+
26
+ def __init__(self, endpoint_url, access_key_id, secret_access_key, region_name='default'):
27
+ """
28
+ Initialize S3 connector with credentials and endpoint information.
29
+
30
+ Parameters
31
+ ----------
32
+ endpoint_url : str
33
+ The URL of the S3 endpoint
34
+ access_key_id : str
35
+ The access key for authentication
36
+ secret_access_key : str
37
+ The secret key for authentication
38
+ region_name : str, optional
39
+ The AWS region name, by default 'default'
40
+ """
41
+ self.endpoint_url = endpoint_url
42
+ self.access_key_id = access_key_id
43
+ self.secret_access_key = secret_access_key
44
+ self.region_name = region_name
45
+ self.s3_client = None
46
+
47
+ def connect(self):
48
+ """
49
+ Establish connection to S3 service.
50
+
51
+ Returns
52
+ -------
53
+ bool
54
+ True if connection was successful, False otherwise
55
+ """
56
+ try:
57
+ # Also create a client object
58
+ self.s3_client = boto3.client(
59
+ 's3',
60
+ endpoint_url=self.endpoint_url,
61
+ aws_access_key_id=self.access_key_id,
62
+ aws_secret_access_key=self.secret_access_key,
63
+ region_name=self.region_name
64
+ )
65
+ return True
66
+ except Exception as e:
67
+ print(f"Connection failed: {e}")
68
+ return False
69
+
70
+ def get_s3(self):
71
+ """
72
+ Return the S3 resource object.
73
+
74
+ If not already connected, this method will first establish a connection.
75
+
76
+ Returns
77
+ -------
78
+ boto3.resources.factory.s3.ServiceResource
79
+ The boto3 S3 resource object for interacting with S3 storage
80
+ """
81
+ if not self.s3:
82
+ self.connect()
83
+ return self.s3
84
+
85
+ def get_s3_client(self):
86
+ """
87
+ Return the S3 client object.
88
+
89
+ If not already connected, this method will first establish a connection.
90
+
91
+ Returns
92
+ -------
93
+ boto3.client.S3
94
+ The boto3 S3 client object for interacting with S3 storage
95
+ """
96
+ if not self.s3_client:
97
+ self.connect()
98
+ return self.s3_client
99
+
100
+
101
+ def extract_s3_path_from_url(url):
102
+ """
103
+ Extracts the S3 object path from an S3 URL or URI.
104
+
105
+ This function parses S3 URLs/URIs and returns just the object path portion,
106
+ removing the protocol (s3://), bucket name, and any leading slashes.
107
+
108
+ Args:
109
+ url (str): The full S3 URI (e.g., 's3://eodata/path/to/file.jp2')
110
+
111
+ Returns:
112
+ str: The S3 object path (without protocol, bucket name and leading slashes)
113
+ """
114
+ # If it's not an S3 URI, return it unchanged
115
+ if not url.startswith('s3://'):
116
+ return url
117
+
118
+ # Parse the S3 URI
119
+ parsed_url = urlparse(url)
120
+
121
+ # Ensure this is an S3 URL
122
+ if parsed_url.scheme != 's3':
123
+ raise ValueError(f"URL {url} is not an S3 URL")
124
+
125
+ # Extract the path without leading slashes
126
+ object_path = parsed_url.path.lstrip('/')
127
+
128
+ return object_path
129
+
130
+
131
+ class ProductDownloader:
132
+ """
133
+ Class for downloading products from Copernicus Data Space Ecosystem.
134
+
135
+ This class provides methods to download products using an S3 client connection,
136
+ either as in-memory content (bytes) or as files saved to disk.
137
+ """
138
+
139
+ def __init__(self, s3_client, bucket_name='eodata'):
140
+ """
141
+ Initialize the product downloader with an S3 client.
142
+
143
+ Args:
144
+ s3_client: The boto3 S3 client to use for downloads
145
+ bucket_name (str): The S3 bucket name where products are stored (default: 'eodata')
146
+ """
147
+
148
+ self.s3_client = s3_client
149
+ self.bucket_name = bucket_name
150
+
151
+
152
+ def get_product_content(self, product_path):
153
+ """
154
+ Download a Sentinel-2 product directly from Copernicus Data Space Ecosystem as a bytes object.
155
+
156
+ Args:
157
+ product_path (str): S3 key or full S3 URI to the product
158
+
159
+ Returns:
160
+ bytes: The product content as bytes
161
+ str: The filename of the product
162
+ """
163
+ # Extract S3 key if full URI is provided
164
+
165
+ # Extract the filename from the path
166
+ _, filename = os.path.split(product_path)
167
+
168
+ # Download the file to a bytes buffer
169
+ try:
170
+ # Create a bytes buffer
171
+ buffer = io.BytesIO()
172
+
173
+ # Download the file to the buffer using the client
174
+ self.s3_client.download_fileobj(self.bucket_name, product_path, buffer)
175
+
176
+ # Reset buffer position to the start
177
+ buffer.seek(0)
178
+
179
+ # Get the bytes
180
+ product_content = buffer.getvalue()
181
+
182
+ print(f"Successfully downloaded product: {filename}")
183
+
184
+ # Return both the bytes and the filename
185
+ return product_content, filename
186
+ except Exception as e:
187
+ print(f"Error downloading product: {str(e)}")
188
+ raise
189
+
190
+ def download_product(self, product_path, output_filename=None):
191
+ """
192
+ Download a Sentinel-2 product directly from Copernicus Data Space Ecosystem to disk.
193
+
194
+ Args:
195
+ product_path (str): S3 key or full S3 URI to the product
196
+ output_filename (str, optional): Filename to save the product to.
197
+ If None, uses the original filename.
198
+
199
+ Returns:
200
+ str: Path to the downloaded file
201
+ """
202
+ # Extract S3 key if full URI is provided
203
+ if product_path.startswith('s3://'):
204
+ product_path = self.get_s3_key_from_href(product_path)
205
+
206
+ # Extract the filename from the path
207
+ _, filename = os.path.split(product_path)
208
+
209
+ # Use custom filename if provided, otherwise use the original
210
+ if output_filename is None:
211
+ output_filename = filename
212
+
213
+ # Download the file using the client
214
+ try:
215
+ self.s3_client.download_file(self.bucket_name, product_path, output_filename)
216
+ print(f"Successfully downloaded product {filename} to {output_filename}")
217
+ return output_filename
218
+ except Exception as e:
219
+ print(f"Error downloading product: {str(e)}")
220
+ raise
221
+
222
+
223
+
224
+ if __name__ == "__main__":
225
+
226
+ # Get credentials from environment variables
227
+ ACCESS_KEY_ID = os.environ.get("ACCESS_KEY_ID")
228
+ SECRET_ACCESS_KEY = os.environ.get("SECRET_ACCESS_KEY")
229
+ ENDPOINT_URL = 'https://eodata.dataspace.copernicus.eu'
230
+ ENDPOINT_STAC = "https://stac.dataspace.copernicus.eu/v1/"
231
+ LON, LAT = 15, 50
232
+
233
+ # Initialize the connector
234
+ s3_connector = S3Connector(
235
+ endpoint_url=ENDPOINT_URL,
236
+ access_key_id=ACCESS_KEY_ID,
237
+ secret_access_key=SECRET_ACCESS_KEY
238
+ )
239
+ # Connect to S3
240
+ s3_connector.connect()
241
+ s3_client = s3_connector.get_s3_client()
242
+
243
+ catalog = pystac_client.Client.open(ENDPOINT_STAC)
244
+ # Search for Sentinel-2 products
245
+ # items_txt = catalog.search(
246
+ # collections=['sentinel-2-l2a'],
247
+ # intersects=dict(type="Point", coordinates=[LON, LAT]),
248
+ # datetime="2024-05-01/2024-06-01",
249
+ # query=["eo:cloud_cover<50"]
250
+ # ).item_collection()
251
+
252
+ # Define bounding box coordinates [min_lon, min_lat, max_lon, max_lat]
253
+ bbox = [150.47, -21.42, 151.47, -20.42] # 1° box around LON=150.97, LAT=-20.92
254
+
255
+ # Search for Sentinel-2 products within the bounding box
256
+ items_txt = catalog.search(
257
+ collections=['sentinel-2-l2a'],
258
+ bbox=bbox,
259
+ datetime="2024-05-01/2024-06-01",
260
+ query=["eo:cloud_cover<50"]
261
+ ).item_collection()
262
+
263
+ for item in items_txt:
264
+ product_url = extract_s3_path_from_url(item.assets['TCI_10m'].href)
265
+ print(product_url)
266
+
267
+ # Initialize the handler with the S3 connector
268
+ handler = ProductDownloader(s3_client=s3_client, bucket_name='eodata')
269
+
270
+ # # Get the image content as bytes
271
+ # image_content, filename = handler.get_product_content(product_url)
272
+ # print(f"Downloaded {filename}, content size: {len(image_content)} bytes")
273
+
274
+ # Download the image to a file
275
+ downloaded_file = handler.download_product(product_url)
276
+ print(f"Downloaded file saved to {downloaded_file}")
277
+ # product_url = extract_s3_path_from_url(items_txt[0].assets['TCI_60m'].href)
278
+ # print(product_url)
279
+
280
+ # # Initialize the handler with the S3 connector
281
+ # handler = ProductDownloader(s3_client=s3_client, bucket_name='eodata')
282
+
283
+ # # Get the image content as bytes
284
+ # image_content, filename = handler.get_product_content(product_url)
285
+ # print(f"Downloaded {filename}, content size: {len(image_content)} bytes")
286
+
287
+ # # Download the image to a file
288
+ # downloaded_file = handler.download_product(product_url)
289
+ # print(f"Downloaded file saved to {downloaded_file}")
290
+
291
+ # from PIL import Image
292
+ # image = Image.open(io.BytesIO(image_content))
293
+