File size: 838 Bytes
4ccde7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from urllib.parse import urlparse
from app.core.exceptions import InvalidDocumentURLError


def validate_document_url(url: str) -> str:
    """Validate that the given string is a well-formed http(s) URL.

    Args:
        url: The URL to validate.

    Returns:
        The trimmed URL.

    Raises:
        InvalidDocumentURLError: If the URL is malformed.
    """
    if not url or not isinstance(url, str):
        raise InvalidDocumentURLError("Document URL must be a non-empty string.")

    url = url.strip()
    parsed = urlparse(url)

    if parsed.scheme not in {"http", "https"}:
        raise InvalidDocumentURLError(
            f"Unsupported URL scheme: '{parsed.scheme}'. Use http or https."
        )
    if not parsed.netloc:
        raise InvalidDocumentURLError("Document URL is missing a valid host.")

    return url