Spaces:
Sleeping
Sleeping
| from urllib.parse import urlparse | |
| from app.core.exceptions import InvalidDocumentURLError | |
| def validate_document_url(url: str) -> str: | |
| """Validate that the given string is a well-formed http(s) URL. | |
| Args: | |
| url: The URL to validate. | |
| Returns: | |
| The trimmed URL. | |
| Raises: | |
| InvalidDocumentURLError: If the URL is malformed. | |
| """ | |
| if not url or not isinstance(url, str): | |
| raise InvalidDocumentURLError("Document URL must be a non-empty string.") | |
| url = url.strip() | |
| parsed = urlparse(url) | |
| if parsed.scheme not in {"http", "https"}: | |
| raise InvalidDocumentURLError( | |
| f"Unsupported URL scheme: '{parsed.scheme}'. Use http or https." | |
| ) | |
| if not parsed.netloc: | |
| raise InvalidDocumentURLError("Document URL is missing a valid host.") | |
| return url | |