Spaces:
Sleeping
Sleeping
| from urllib.parse import urlparse | |
| def extract_s3_path_from_url(url): | |
| """ | |
| Extracts the S3 object path from an S3 URL or URI. | |
| This function parses S3 URLs/URIs and returns just the object path portion, | |
| removing the protocol (s3://), bucket name, and any leading slashes. | |
| Args: | |
| url (str): The full S3 URI (e.g., 's3://eodata/path/to/file.jp2') | |
| Returns: | |
| str: The S3 object path (without protocol, bucket name and leading slashes) | |
| """ | |
| # If it's not an S3 URI, return it unchanged | |
| if not url.startswith('s3://'): | |
| return url | |
| # Parse the S3 URI | |
| parsed_url = urlparse(url) | |
| # Ensure this is an S3 URL | |
| if parsed_url.scheme != 's3': | |
| raise ValueError(f"URL {url} is not an S3 URL") | |
| # Extract the path without leading slashes | |
| object_path = parsed_url.path.lstrip('/') | |
| return object_path |