| """ |
| URL处理辅助函数 |
| |
| 提供URL解析和路径提取功能,用于导航验证中的域名无关匹配。 |
| """ |
|
|
| from urllib.parse import urlparse |
|
|
|
|
| def extract_url_path(url: str) -> str: |
| """ |
| 提取URL的路径和查询参数部分,忽略协议和域名差异 |
| |
| 用于验证导航是否到达正确页面,允许域名重定向。 |
| |
| Args: |
| url: 完整URL字符串 |
| |
| Returns: |
| 路径+查询参数+片段(例如:"/apps/drive/123?param=value#section") |
| 如果URL为空或无效,返回空字符串 |
| |
| Examples: |
| >>> extract_url_path("https://ai.studio/apps/drive/123?param=value") |
| '/apps/drive/123?param=value' |
| |
| >>> extract_url_path("https://aistudio.google.com/apps/drive/123") |
| '/apps/drive/123' |
| |
| >>> extract_url_path("https://example.com/path") |
| '/path' |
| """ |
| if not url: |
| return "" |
|
|
| try: |
| parsed = urlparse(url) |
| result = parsed.path |
| if parsed.query: |
| result += '?' + parsed.query |
| if parsed.fragment: |
| result += '#' + parsed.fragment |
| return result |
| except Exception: |
| |
| return "" |
|
|
|
|
| def mask_path_for_logging(path: str) -> str: |
| """ |
| 对路径进行脱敏处理,用于日志输出 |
| |
| 脱敏规则: |
| 1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替 |
| 2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整路径 |
| |
| Args: |
| path: URL路径字符串 |
| |
| Returns: |
| 脱敏后的路径字符串 |
| |
| Examples: |
| >>> mask_path_for_logging("/apps/drive/abcdef123456") |
| '/apps/drive/abcd***3456' |
| |
| >>> mask_path_for_logging("/apps/drive/xyz789") |
| '/apps/drive/xyz789' |
| |
| >>> mask_path_for_logging("/other/path") |
| '/other/path' |
| """ |
| if not path: |
| return "" |
|
|
| |
| if path.startswith('/apps/drive/'): |
| |
| path_parts = path.split('/') |
| if len(path_parts) >= 4: |
| drive_id = path_parts[3] |
|
|
| |
| if len(drive_id) > 8: |
| |
| masked_id = f"{drive_id[:4]}***{drive_id[-4:]}" |
| |
| masked_parts = path_parts[:3] + [masked_id] + path_parts[4:] |
| return '/'.join(masked_parts) |
|
|
| |
| return path |
|
|
|
|
| def mask_url_for_logging(url: str) -> str: |
| """ |
| 对URL进行脱敏处理,用于日志输出 |
| |
| 脱敏规则: |
| 1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替 |
| 2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整URL |
| |
| Args: |
| url: 完整URL字符串 |
| |
| Returns: |
| 脱敏后的URL字符串 |
| |
| Examples: |
| >>> mask_url_for_logging("https://ai.studio/apps/drive/abcdef123456") |
| 'https://ai.studio/apps/drive/abcd***3456' |
| |
| >>> mask_url_for_logging("https://aistudio.google.com/apps/drive/xyz789") |
| 'https://aistudio.google.com/apps/drive/xyz789' |
| |
| >>> mask_url_for_logging("https://example.com/other/path") |
| 'https://example.com/other/path' |
| """ |
| if not url: |
| return "" |
|
|
| try: |
| parsed = urlparse(url) |
|
|
| |
| if parsed.path.startswith('/apps/drive/'): |
| |
| path_parts = parsed.path.split('/') |
| if len(path_parts) >= 4: |
| drive_id = path_parts[3] |
|
|
| |
| if len(drive_id) > 8: |
| masked_id = f"{drive_id[:4]}***{drive_id[-4:]}" |
| |
| masked_parts = path_parts[:3] + [masked_id] + path_parts[4:] |
| masked_path = '/'.join(masked_parts) |
|
|
| |
| result = f"{parsed.scheme}://{parsed.netloc}{masked_path}" |
| if parsed.query: |
| result += '?' + parsed.query |
| if parsed.fragment: |
| result += '#' + parsed.fragment |
| return result |
|
|
| |
| return url |
|
|
| except Exception: |
| |
| return url |
|
|