File size: 889 Bytes
cd6f412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import hashlib
import re
from urllib.parse import urlparse
from pathlib import Path

def get_content_hash(content: bytes) -> str:
    """Generates a SHA-256 hash for the given binary content."""
    return hashlib.sha256(content).hexdigest()

def sanitize_filename(url: str) -> str:
    """
    Creates a sanitized, readable filename from a URL.
    e.g., 'https://example.com/path/to/file.html?query=1' -> 'path_to_file.html'
    """
    parsed_url = urlparse(url)
    # Use the path, but remove leading/trailing slashes
    path_part = parsed_url.path.strip('/')
    
    if not path_part:
        # If path is empty (e.g., domain.com/), use the netloc
        path_part = parsed_url.netloc

    # Replace slashes with underscores and remove other invalid chars
    sanitized = re.sub(r'[<>:"/\\|?*]', '_', path_part)
    
    # Limit length to avoid OS errors
    return sanitized[:150]