Deep Chavda
feat: initial release — PDF to Markdown MCP server
4ccde7a
raw
history blame contribute delete
838 Bytes
from urllib.parse import urlparse
from app.core.exceptions import InvalidDocumentURLError
def validate_document_url(url: str) -> str:
"""Validate that the given string is a well-formed http(s) URL.
Args:
url: The URL to validate.
Returns:
The trimmed URL.
Raises:
InvalidDocumentURLError: If the URL is malformed.
"""
if not url or not isinstance(url, str):
raise InvalidDocumentURLError("Document URL must be a non-empty string.")
url = url.strip()
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"}:
raise InvalidDocumentURLError(
f"Unsupported URL scheme: '{parsed.scheme}'. Use http or https."
)
if not parsed.netloc:
raise InvalidDocumentURLError("Document URL is missing a valid host.")
return url