Spaces:
Running
Running
File size: 4,616 Bytes
4047340 2011b89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
"""
URL处理辅助函数
提供URL解析和路径提取功能,用于导航验证中的域名无关匹配。
"""
from urllib.parse import urlparse
def extract_url_path(url: str) -> str:
"""
提取URL的路径和查询参数部分,忽略协议和域名差异
用于验证导航是否到达正确页面,允许域名重定向。
Args:
url: 完整URL字符串
Returns:
路径+查询参数+片段(例如:"/apps/drive/123?param=value#section")
如果URL为空或无效,返回空字符串
Examples:
>>> extract_url_path("https://ai.studio/apps/drive/123?param=value")
'/apps/drive/123?param=value'
>>> extract_url_path("https://aistudio.google.com/apps/drive/123")
'/apps/drive/123'
>>> extract_url_path("https://example.com/path")
'/path'
"""
if not url:
return ""
try:
parsed = urlparse(url)
result = parsed.path
if parsed.query:
result += '?' + parsed.query
if parsed.fragment:
result += '#' + parsed.fragment
return result
except Exception:
# 如果URL格式无效,返回空字符串
return ""
def mask_path_for_logging(path: str) -> str:
"""
对路径进行脱敏处理,用于日志输出
脱敏规则:
1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替
2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整路径
Args:
path: URL路径字符串
Returns:
脱敏后的路径字符串
Examples:
>>> mask_path_for_logging("/apps/drive/abcdef123456")
'/apps/drive/abcd***3456'
>>> mask_path_for_logging("/apps/drive/xyz789")
'/apps/drive/xyz789'
>>> mask_path_for_logging("/other/path")
'/other/path'
"""
if not path:
return ""
# 检查是否为 /apps/drive/ 路径
if path.startswith('/apps/drive/'):
# 提取路径中的ID部分
path_parts = path.split('/')
if len(path_parts) >= 4: # ['', 'apps', 'drive', 'ID']
drive_id = path_parts[3]
# 如果ID长度大于8,则进行脱敏处理
if len(drive_id) > 8:
# 使用与URL脱敏相同的格式
masked_id = f"{drive_id[:4]}***{drive_id[-4:]}"
# 重新构建路径
masked_parts = path_parts[:3] + [masked_id] + path_parts[4:]
return '/'.join(masked_parts)
# 如果不符合脱敏条件,返回原始路径
return path
def mask_url_for_logging(url: str) -> str:
"""
对URL进行脱敏处理,用于日志输出
脱敏规则:
1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替
2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整URL
Args:
url: 完整URL字符串
Returns:
脱敏后的URL字符串
Examples:
>>> mask_url_for_logging("https://ai.studio/apps/drive/abcdef123456")
'https://ai.studio/apps/drive/abcd***3456'
>>> mask_url_for_logging("https://aistudio.google.com/apps/drive/xyz789")
'https://aistudio.google.com/apps/drive/xyz789'
>>> mask_url_for_logging("https://example.com/other/path")
'https://example.com/other/path'
"""
if not url:
return ""
try:
parsed = urlparse(url)
# 检查是否为 /apps/drive/ 路径
if parsed.path.startswith('/apps/drive/'):
# 提取路径中的ID部分
path_parts = parsed.path.split('/')
if len(path_parts) >= 4: # ['', 'apps', 'drive', 'ID']
drive_id = path_parts[3]
# 如果ID长度大于8,则进行脱敏处理
if len(drive_id) > 8:
masked_id = f"{drive_id[:4]}***{drive_id[-4:]}"
# 重新构建路径
masked_parts = path_parts[:3] + [masked_id] + path_parts[4:]
masked_path = '/'.join(masked_parts)
# 重新构建URL
result = f"{parsed.scheme}://{parsed.netloc}{masked_path}"
if parsed.query:
result += '?' + parsed.query
if parsed.fragment:
result += '#' + parsed.fragment
return result
# 如果不符合脱敏条件,返回原始URL
return url
except Exception:
# 如果URL解析失败,返回原始URL
return url
|