rodolphethinks1 commited on
Commit
edb5ced
·
verified ·
1 Parent(s): 1bb2bbf

Update src/utils/stac_client.py

Browse files
Files changed (1) hide show
  1. src/utils/stac_client.py +76 -108
src/utils/stac_client.py CHANGED
@@ -4,110 +4,78 @@ from datetime import datetime, timedelta
4
  import random
5
  import requests
6
  from pystac_client import Client
7
- import os
8
 
9
  from src.auth.auth import get_direct_access_token
10
  from src.utils.image import extract_url_after_filename
11
 
12
- class ProductDownloader:
 
13
  """
14
- Class for downloading products from Copernicus Data Space Ecosystem.
15
-
16
- This class provides methods to download products using an S3 client connection,
17
- either as in-memory content (bytes) or as files saved to disk.
 
 
 
 
 
18
  """
19
-
20
- def __init__(self, s3_client, bucket_name='eodata'):
21
- """
22
- Initialize the product downloader with an S3 client.
23
-
24
- Args:
25
- s3_client: The boto3 S3 client to use for downloads
26
- bucket_name (str): The S3 bucket name where products are stored (default: 'eodata')
27
- """
28
-
29
- self.s3_client = s3_client
30
- self.bucket_name = bucket_name
31
-
32
-
33
- def get_product_content(self, product_path):
34
- """
35
- Download a Sentinel-2 product directly from Copernicus Data Space Ecosystem as a bytes object.
36
-
37
- Args:
38
- product_path (str): S3 key or full S3 URI to the product
39
-
40
- Returns:
41
- bytes: The product content as bytes
42
- str: The filename of the product
43
- """
44
- # Extract S3 key if full URI is provided
45
-
46
- # Extract the filename from the path
47
- _, filename = os.path.split(product_path)
48
-
49
- # Download the file to a bytes buffer
50
- try:
51
- # Create a bytes buffer
52
- buffer = io.BytesIO()
53
-
54
- # Download the file to the buffer using the client
55
- self.s3_client.download_fileobj(self.bucket_name, product_path, buffer)
56
-
57
- # Reset buffer position to the start
58
- buffer.seek(0)
59
-
60
- # Get the bytes
61
- product_content = buffer.getvalue()
62
-
63
- print(f"Successfully downloaded product: {filename}")
64
-
65
- # Return both the bytes and the filename
66
- return product_content, filename
67
- except Exception as e:
68
- print(f"Error downloading product: {str(e)}")
69
- raise
70
-
71
- async def download_product(self, product_path, output_filename=None):
72
- """
73
- Download a Sentinel-2 product directly from Copernicus Data Space Ecosystem to disk.
74
-
75
- Args:
76
- product_path (str): S3 key or full S3 URI to the product
77
- output_filename (str, optional): Filename to save the product to.
78
- If None, uses the original filename.
79
-
80
- Returns:
81
- str: Path to the downloaded file
82
- """
83
- # Extract S3 key if full URI is provided
84
- if product_path.startswith('s3://'):
85
- product_path = self.get_s3_key_from_href(product_path)
86
-
87
- # Extract the filename from the path
88
- _, filename = os.path.split(product_path)
89
-
90
- # Use custom filename if provided, otherwise use the original
91
- if output_filename is None:
92
- output_filename = filename
93
-
94
- # Download the file using the client
95
- try:
96
- self.s3_client.download_file(self.bucket_name, product_path, output_filename)
97
- print(f"Successfully downloaded product {filename} to {output_filename}")
98
- return output_filename
99
- except Exception as e:
100
- print(f"Error downloading product: {str(e)}")
101
- raise
102
 
103
- from src.auth.auth import get_direct_access_token
104
- from src.utils.image import extract_url_after_filename
105
 
106
  def download_sentinel_image(username, password, start_date, end_date,
107
  bbox=[-180, -90, 180, 90], limit=10):
108
  """
109
  Download a random Sentinel-2 image based on criteria.
110
-
111
  Args:
112
  username (str): DESTINE username
113
  password (str): DESTINE password
@@ -115,7 +83,7 @@ def download_sentinel_image(username, password, start_date, end_date,
115
  cloud_cover (int, optional): Maximum cloud cover percentage
116
  bbox (list): Bounding box coordinates [west, south, east, north]
117
  limit (int): Maximum number of results to return
118
-
119
  Returns:
120
  tuple: (image_content or error_message, metadata)
121
  """
@@ -130,14 +98,14 @@ def download_sentinel_image(username, password, start_date, end_date,
130
  stac_base_url = "https://cachea.destine.eu"
131
  stac_url = f"{stac_base_url}/stac/api"
132
  catalog = Client.open(stac_url)
133
-
134
  start_date = datetime.strptime(start_date, "%Y-%m-%d")
135
  end_date = datetime.strptime(end_date, "%Y-%m-%d")
136
  days_between = (end_date - start_date).days
137
  random_start_day = random.randint(0, days_between - 7) # Ensure we have 7 days
138
  random_start_date = start_date + timedelta(days=random_start_day)
139
  random_end_date = random_start_date + timedelta(days=1)
140
-
141
  # Format dates for the API
142
  start_date_str = random_start_date.strftime("%Y-%m-%d")
143
  end_date_str = random_end_date.strftime("%Y-%m-%d")
@@ -150,49 +118,49 @@ def download_sentinel_image(username, password, start_date, end_date,
150
  "datetime": f"{start_date_str}/{end_date_str}",
151
  "limit": limit
152
  }
153
-
154
 
155
  # Search for Sentinel-2 images
156
  search = catalog.search(**search_params)
157
-
158
  # Get a list of items
159
  items = list(search.items())
160
  if not items:
161
  return "No Sentinel-2 images found", None
162
-
163
  # Select a random item
164
  random_item = random.choice(items)
165
-
166
  # Get metadata for the selected item
167
  metadata = {
168
  "id": random_item.id,
169
  "datetime": random_item.datetime.strftime("%Y-%m-%d %H:%M:%S"),
170
  "bbox": random_item.bbox,
171
  }
172
-
173
-
174
  # Get the assets of the random item
175
  assets = random_item.assets
176
  asset_keys = list(assets.keys())
177
-
178
  # Filter the assets to get the one that ends with *_TCI_60m.jp2
179
  tci_assets = [assets[key].href for key in asset_keys if assets[key].href.endswith('_TCI_60m.jp2')]
180
-
181
  if not tci_assets:
182
  return "No TCI assets found in the selected image", None
183
-
184
  filepath = extract_url_after_filename(tci_assets[0])
185
  metadata["filename"] = os.path.basename(filepath)
186
-
187
  # Download the file
188
  url = f"{stac_base_url}/stac/download?filename={filepath}"
189
-
190
  headers = {
191
  'Authorization': f'Bearer {access_token}'
192
  }
193
-
194
  response = requests.post(url, headers=headers, data={})
195
-
196
  if response.status_code == 200:
197
  return response.content, metadata
198
  else:
 
4
  import random
5
  import requests
6
  from pystac_client import Client
7
+ import os
8
 
9
  from src.auth.auth import get_direct_access_token
10
  from src.utils.image import extract_url_after_filename
11
 
12
+
13
+ def get_product_content(s3_client, bucket_name, object_url):
14
  """
15
+ Download the content of a product from S3 bucket.
16
+
17
+ Args:
18
+ s3_client: boto3 S3 client object
19
+ bucket_name (str): Name of the S3 bucket
20
+ object_url (str): Path to the object within the bucket
21
+
22
+ Returns:
23
+ bytes: Content of the downloaded file
24
  """
25
+ print(f"Downloading {object_url}")
26
+
27
+ try:
28
+ # Download the file from S3
29
+ response = s3_client.get_object(Bucket=bucket_name, Key=object_url)
30
+ content = response['Body'].read()
31
+ print(f"Successfully downloaded {object_url}")
32
+ except Exception as e:
33
+ print(f"Error downloading file: {str(e)}")
34
+ raise
35
+
36
+ return content
37
+
38
+
39
+ def get_product(s3_resource, bucket_name, object_url, output_path):
40
+ """
41
+ Download a product from S3 bucket and create output directory if it doesn't exist.
42
+
43
+ Args:
44
+ s3_resource: boto3 S3 resource object
45
+ bucket_name (str): Name of the S3 bucket
46
+ object_url (str): Path to the object within the bucket
47
+ output_path (str): Local directory to save the file
48
+
49
+ Returns:
50
+ str: Path to the downloaded file
51
+ """
52
+ # Create output directory if it doesn't exist
53
+ os.makedirs(output_path, exist_ok=True)
54
+
55
+ # Extract filename from the object URL
56
+ _, filename = os.path.split(object_url)
57
+
58
+ # Full path where the file will be saved
59
+ local_file_path = os.path.join(output_path, filename)
60
+
61
+ print(f"Downloading {object_url} to {local_file_path}...")
62
+
63
+ try:
64
+ # Download the file from S3
65
+ s3_resource.Bucket(bucket_name).download_file(object_url, local_file_path)
66
+ print(f"Successfully downloaded to {local_file_path}")
67
+ except Exception as e:
68
+ print(f"Error downloading file: {str(e)}")
69
+ raise
70
+
71
+ return local_file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
 
 
73
 
74
  def download_sentinel_image(username, password, start_date, end_date,
75
  bbox=[-180, -90, 180, 90], limit=10):
76
  """
77
  Download a random Sentinel-2 image based on criteria.
78
+
79
  Args:
80
  username (str): DESTINE username
81
  password (str): DESTINE password
 
83
  cloud_cover (int, optional): Maximum cloud cover percentage
84
  bbox (list): Bounding box coordinates [west, south, east, north]
85
  limit (int): Maximum number of results to return
86
+
87
  Returns:
88
  tuple: (image_content or error_message, metadata)
89
  """
 
98
  stac_base_url = "https://cachea.destine.eu"
99
  stac_url = f"{stac_base_url}/stac/api"
100
  catalog = Client.open(stac_url)
101
+
102
  start_date = datetime.strptime(start_date, "%Y-%m-%d")
103
  end_date = datetime.strptime(end_date, "%Y-%m-%d")
104
  days_between = (end_date - start_date).days
105
  random_start_day = random.randint(0, days_between - 7) # Ensure we have 7 days
106
  random_start_date = start_date + timedelta(days=random_start_day)
107
  random_end_date = random_start_date + timedelta(days=1)
108
+
109
  # Format dates for the API
110
  start_date_str = random_start_date.strftime("%Y-%m-%d")
111
  end_date_str = random_end_date.strftime("%Y-%m-%d")
 
118
  "datetime": f"{start_date_str}/{end_date_str}",
119
  "limit": limit
120
  }
121
+
122
 
123
  # Search for Sentinel-2 images
124
  search = catalog.search(**search_params)
125
+
126
  # Get a list of items
127
  items = list(search.items())
128
  if not items:
129
  return "No Sentinel-2 images found", None
130
+
131
  # Select a random item
132
  random_item = random.choice(items)
133
+
134
  # Get metadata for the selected item
135
  metadata = {
136
  "id": random_item.id,
137
  "datetime": random_item.datetime.strftime("%Y-%m-%d %H:%M:%S"),
138
  "bbox": random_item.bbox,
139
  }
140
+
141
+
142
  # Get the assets of the random item
143
  assets = random_item.assets
144
  asset_keys = list(assets.keys())
145
+
146
  # Filter the assets to get the one that ends with *_TCI_60m.jp2
147
  tci_assets = [assets[key].href for key in asset_keys if assets[key].href.endswith('_TCI_60m.jp2')]
148
+
149
  if not tci_assets:
150
  return "No TCI assets found in the selected image", None
151
+
152
  filepath = extract_url_after_filename(tci_assets[0])
153
  metadata["filename"] = os.path.basename(filepath)
154
+
155
  # Download the file
156
  url = f"{stac_base_url}/stac/download?filename={filepath}"
157
+
158
  headers = {
159
  'Authorization': f'Bearer {access_token}'
160
  }
161
+
162
  response = requests.post(url, headers=headers, data={})
163
+
164
  if response.status_code == 200:
165
  return response.content, metadata
166
  else: