|
|
""" |
|
|
URL处理辅助函数 |
|
|
|
|
|
提供URL解析和路径提取功能,用于导航验证中的域名无关匹配。 |
|
|
""" |
|
|
|
|
|
from urllib.parse import urlparse |
|
|
|
|
|
|
|
|
def extract_url_path(url: str) -> str: |
|
|
""" |
|
|
提取URL的路径和查询参数部分,忽略协议和域名差异 |
|
|
|
|
|
用于验证导航是否到达正确页面,允许域名重定向。 |
|
|
|
|
|
Args: |
|
|
url: 完整URL字符串 |
|
|
|
|
|
Returns: |
|
|
路径+查询参数+片段(例如:"/apps/drive/123?param=value#section") |
|
|
如果URL为空或无效,返回空字符串 |
|
|
|
|
|
Examples: |
|
|
>>> extract_url_path("https://ai.studio/apps/drive/123?param=value") |
|
|
'/apps/drive/123?param=value' |
|
|
|
|
|
>>> extract_url_path("https://aistudio.google.com/apps/drive/123") |
|
|
'/apps/drive/123' |
|
|
|
|
|
>>> extract_url_path("https://example.com/path") |
|
|
'/path' |
|
|
""" |
|
|
if not url: |
|
|
return "" |
|
|
|
|
|
try: |
|
|
parsed = urlparse(url) |
|
|
result = parsed.path |
|
|
if parsed.query: |
|
|
result += '?' + parsed.query |
|
|
if parsed.fragment: |
|
|
result += '#' + parsed.fragment |
|
|
return result |
|
|
except Exception: |
|
|
|
|
|
return "" |
|
|
|
|
|
|
|
|
def mask_path_for_logging(path: str) -> str: |
|
|
""" |
|
|
对路径进行脱敏处理,用于日志输出 |
|
|
|
|
|
脱敏规则: |
|
|
1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替 |
|
|
2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整路径 |
|
|
|
|
|
Args: |
|
|
path: URL路径字符串 |
|
|
|
|
|
Returns: |
|
|
脱敏后的路径字符串 |
|
|
|
|
|
Examples: |
|
|
>>> mask_path_for_logging("/apps/drive/abcdef123456") |
|
|
'/apps/drive/abcd***3456' |
|
|
|
|
|
>>> mask_path_for_logging("/apps/drive/xyz789") |
|
|
'/apps/drive/xyz789' |
|
|
|
|
|
>>> mask_path_for_logging("/other/path") |
|
|
'/other/path' |
|
|
""" |
|
|
if not path: |
|
|
return "" |
|
|
|
|
|
|
|
|
if path.startswith('/apps/drive/'): |
|
|
|
|
|
path_parts = path.split('/') |
|
|
if len(path_parts) >= 4: |
|
|
drive_id = path_parts[3] |
|
|
|
|
|
|
|
|
if len(drive_id) > 8: |
|
|
|
|
|
masked_id = f"{drive_id[:4]}***{drive_id[-4:]}" |
|
|
|
|
|
masked_parts = path_parts[:3] + [masked_id] + path_parts[4:] |
|
|
return '/'.join(masked_parts) |
|
|
|
|
|
|
|
|
return path |
|
|
|
|
|
|
|
|
def mask_url_for_logging(url: str) -> str: |
|
|
""" |
|
|
对URL进行脱敏处理,用于日志输出 |
|
|
|
|
|
脱敏规则: |
|
|
1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替 |
|
|
2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整URL |
|
|
|
|
|
Args: |
|
|
url: 完整URL字符串 |
|
|
|
|
|
Returns: |
|
|
脱敏后的URL字符串 |
|
|
|
|
|
Examples: |
|
|
>>> mask_url_for_logging("https://ai.studio/apps/drive/abcdef123456") |
|
|
'https://ai.studio/apps/drive/abcd***3456' |
|
|
|
|
|
>>> mask_url_for_logging("https://aistudio.google.com/apps/drive/xyz789") |
|
|
'https://aistudio.google.com/apps/drive/xyz789' |
|
|
|
|
|
>>> mask_url_for_logging("https://example.com/other/path") |
|
|
'https://example.com/other/path' |
|
|
""" |
|
|
if not url: |
|
|
return "" |
|
|
|
|
|
try: |
|
|
parsed = urlparse(url) |
|
|
|
|
|
|
|
|
if parsed.path.startswith('/apps/drive/'): |
|
|
|
|
|
path_parts = parsed.path.split('/') |
|
|
if len(path_parts) >= 4: |
|
|
drive_id = path_parts[3] |
|
|
|
|
|
|
|
|
if len(drive_id) > 8: |
|
|
masked_id = f"{drive_id[:4]}***{drive_id[-4:]}" |
|
|
|
|
|
masked_parts = path_parts[:3] + [masked_id] + path_parts[4:] |
|
|
masked_path = '/'.join(masked_parts) |
|
|
|
|
|
|
|
|
result = f"{parsed.scheme}://{parsed.netloc}{masked_path}" |
|
|
if parsed.query: |
|
|
result += '?' + parsed.query |
|
|
if parsed.fragment: |
|
|
result += '#' + parsed.fragment |
|
|
return result |
|
|
|
|
|
|
|
|
return url |
|
|
|
|
|
except Exception: |
|
|
|
|
|
return url |
|
|
|