from urllib.parse import urlparse def extract_s3_path_from_url(url): """ Extracts the S3 object path from an S3 URL or URI. This function parses S3 URLs/URIs and returns just the object path portion, removing the protocol (s3://), bucket name, and any leading slashes. Args: url (str): The full S3 URI (e.g., 's3://eodata/path/to/file.jp2') Returns: str: The S3 object path (without protocol, bucket name and leading slashes) """ # If it's not an S3 URI, return it unchanged if not url.startswith('s3://'): return url # Parse the S3 URI parsed_url = urlparse(url) # Ensure this is an S3 URL if parsed_url.scheme != 's3': raise ValueError(f"URL {url} is not an S3 URL") # Extract the path without leading slashes object_path = parsed_url.path.lstrip('/') return object_path