Spaces:
Sleeping
Sleeping
File size: 889 Bytes
cd6f412 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | import hashlib
import re
from urllib.parse import urlparse
from pathlib import Path
def get_content_hash(content: bytes) -> str:
"""Generates a SHA-256 hash for the given binary content."""
return hashlib.sha256(content).hexdigest()
def sanitize_filename(url: str) -> str:
"""
Creates a sanitized, readable filename from a URL.
e.g., 'https://example.com/path/to/file.html?query=1' -> 'path_to_file.html'
"""
parsed_url = urlparse(url)
# Use the path, but remove leading/trailing slashes
path_part = parsed_url.path.strip('/')
if not path_part:
# If path is empty (e.g., domain.com/), use the netloc
path_part = parsed_url.netloc
# Replace slashes with underscores and remove other invalid chars
sanitized = re.sub(r'[<>:"/\\|?*]', '_', path_part)
# Limit length to avoid OS errors
return sanitized[:150] |