Spaces:
Running
Running
fix(browser): mask URLs in navigation logs and clean messages
Browse files- browser/instance.py +23 -26
- browser/navigation.py +7 -4
- main.py +4 -4
- utils/url_helper.py +106 -0
browser/instance.py
CHANGED
|
@@ -8,7 +8,7 @@ from browser.cookie_validator import CookieValidator
|
|
| 8 |
from camoufox.sync_api import Camoufox
|
| 9 |
from utils.paths import logs_dir
|
| 10 |
from utils.common import parse_headless_mode, ensure_dir
|
| 11 |
-
from utils.url_helper import extract_url_path
|
| 12 |
|
| 13 |
|
| 14 |
def run_browser_instance(config, shutdown_event=None):
|
|
@@ -84,7 +84,7 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 84 |
|
| 85 |
response = None
|
| 86 |
try:
|
| 87 |
-
logger.info(f"正在导航到: {expected_url} (超时设置为 90 秒)")
|
| 88 |
# page.goto() 会返回一个 response 对象,我们可以用它来获取状态码等信息
|
| 89 |
response = page.goto(expected_url, wait_until='domcontentloaded', timeout=90000)
|
| 90 |
|
|
@@ -97,12 +97,12 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 97 |
page.screenshot(path=os.path.join(screenshot_dir, f"WARN_http_status_{response.status}_{diagnostic_tag}.png"))
|
| 98 |
else:
|
| 99 |
# 对于非http/https的导航(如 about:blank),response可能为None
|
| 100 |
-
logger.warning("page.goto 未返回响应对象,可能是一个非HTTP
|
| 101 |
|
| 102 |
except TimeoutError:
|
| 103 |
# 这是最常见的错误:超时
|
| 104 |
-
logger.error(f"导航到 {expected_url} 超时 (超过
|
| 105 |
-
logger.error("
|
| 106 |
# 尝试保存诊断信息
|
| 107 |
try:
|
| 108 |
# 截图对于看到页面卡在什么状态非常有帮助(例如,空白页、加载中、Chrome错误页)
|
|
@@ -122,16 +122,16 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 122 |
except PlaywrightError as e:
|
| 123 |
# 捕获其他Playwright相关的网络错误,例如DNS解析失败、连接被拒绝等
|
| 124 |
error_message = str(e)
|
| 125 |
-
logger.error(f"导航到 {expected_url} 时发生 Playwright
|
| 126 |
logger.error(f"错误详情: {error_message}")
|
| 127 |
|
| 128 |
# Playwright的错误信息通常很具体,例如 "net::ERR_CONNECTION_REFUSED"
|
| 129 |
if "net::ERR_NAME_NOT_RESOLVED" in error_message:
|
| 130 |
-
logger.error("排查建议:检查DNS
|
| 131 |
elif "net::ERR_CONNECTION_REFUSED" in error_message:
|
| 132 |
-
logger.error("
|
| 133 |
elif "net::ERR_INTERNET_DISCONNECTED" in error_message:
|
| 134 |
-
logger.error("
|
| 135 |
|
| 136 |
# 同样,尝试截图,尽管此时页面可能完全无法访问
|
| 137 |
try:
|
|
@@ -148,11 +148,11 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 148 |
page.wait_for_timeout(2000)
|
| 149 |
|
| 150 |
final_url = page.url
|
| 151 |
-
logger.info(f"导航完成。最终URL为: {final_url}")
|
| 152 |
|
| 153 |
# ... 你原有的URL检查逻辑保持不变 ...
|
| 154 |
if "accounts.google.com/v3/signin/identifier" in final_url:
|
| 155 |
-
logger.error("检测到Google登录页面(需要输入邮箱)。Cookie
|
| 156 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_identifier_page_{diagnostic_tag}.png"))
|
| 157 |
return
|
| 158 |
|
|
@@ -161,7 +161,7 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 161 |
final_path = extract_url_path(final_url)
|
| 162 |
|
| 163 |
if expected_path and expected_path in final_path:
|
| 164 |
-
logger.info(f"URL验证通过。预期路径: {expected_path}")
|
| 165 |
|
| 166 |
# --- 新的健壮策略:等待加载指示器���失 ---
|
| 167 |
# 这是解决竞态条件的关键。错误消息或内容只在初始加载完成后才会出现。
|
|
@@ -170,9 +170,9 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 170 |
logger.info("正在等待加载指示器 (spinner) 消失... (最长等待30秒)")
|
| 171 |
# 我们等待spinner变为'隐藏'状态或从DOM中消失。
|
| 172 |
spinner_locator.wait_for(state='hidden', timeout=30000)
|
| 173 |
-
logger.info("
|
| 174 |
except TimeoutError:
|
| 175 |
-
logger.error("页面加载指示器在30
|
| 176 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_spinner_stuck_{diagnostic_tag}.png"))
|
| 177 |
return # 如果页面加载卡住则退出
|
| 178 |
|
|
@@ -183,7 +183,7 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 183 |
|
| 184 |
# 这里我们只需要很短的超时时间,因为页面应该是稳定的。
|
| 185 |
if auth_error_locator.is_visible(timeout=2000):
|
| 186 |
-
logger.error(f"检测到认证失败的错误横幅: '{auth_error_text}'. Cookie
|
| 187 |
screenshot_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.png")
|
| 188 |
page.screenshot(path=screenshot_path)
|
| 189 |
|
|
@@ -194,31 +194,28 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 194 |
return # 明确的失败,因此我们退出。
|
| 195 |
|
| 196 |
# --- 如果没有错误,进行最终确认(作为后备方案) ---
|
| 197 |
-
logger.info("
|
| 198 |
login_button_cn = page.get_by_role('button', name='登录')
|
| 199 |
login_button_en = page.get_by_role('button', name='Login')
|
| 200 |
|
| 201 |
if login_button_cn.is_visible(timeout=1000) or login_button_en.is_visible(timeout=1000):
|
| 202 |
-
logger.error("页面上仍显示'登录'按钮。Cookie
|
| 203 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_login_button_visible_{diagnostic_tag}.png"))
|
| 204 |
return
|
| 205 |
|
| 206 |
# --- 如果所有检查都通过,我们假设成功 ---
|
| 207 |
-
logger.info("
|
| 208 |
-
|
| 209 |
-
# 创建Cookie验证器(验证将在主线程中执行,避免线程问题)
|
| 210 |
-
logger.info("Cookie验证器已创建,将定期验证Cookie有效性")
|
| 211 |
|
| 212 |
handle_successful_navigation(page, logger, diagnostic_tag, shutdown_event, cookie_validator)
|
| 213 |
elif "accounts.google.com/v3/signin/accountchooser" in final_url:
|
| 214 |
-
logger.warning("检测到Google账户选择页面。登录失败或Cookie
|
| 215 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
|
| 216 |
return
|
| 217 |
else:
|
| 218 |
-
logger.error(f"导航到了意外的URL
|
| 219 |
-
logger.error(f" 预期路径: {expected_path}")
|
| 220 |
-
logger.error(f"
|
| 221 |
-
logger.error(f"
|
| 222 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_unexpected_url_{diagnostic_tag}.png"))
|
| 223 |
return
|
| 224 |
|
|
|
|
| 8 |
from camoufox.sync_api import Camoufox
|
| 9 |
from utils.paths import logs_dir
|
| 10 |
from utils.common import parse_headless_mode, ensure_dir
|
| 11 |
+
from utils.url_helper import extract_url_path, mask_url_for_logging, mask_path_for_logging
|
| 12 |
|
| 13 |
|
| 14 |
def run_browser_instance(config, shutdown_event=None):
|
|
|
|
| 84 |
|
| 85 |
response = None
|
| 86 |
try:
|
| 87 |
+
logger.info(f"正在导航到: {mask_url_for_logging(expected_url)} (超时设置为 90 秒)")
|
| 88 |
# page.goto() 会返回一个 response 对象,我们可以用它来获取状态码等信息
|
| 89 |
response = page.goto(expected_url, wait_until='domcontentloaded', timeout=90000)
|
| 90 |
|
|
|
|
| 97 |
page.screenshot(path=os.path.join(screenshot_dir, f"WARN_http_status_{response.status}_{diagnostic_tag}.png"))
|
| 98 |
else:
|
| 99 |
# 对于非http/https的导航(如 about:blank),response可能为None
|
| 100 |
+
logger.warning("page.goto 未返回响应对象,可能是一个非HTTP导航")
|
| 101 |
|
| 102 |
except TimeoutError:
|
| 103 |
# 这是最常见的错误:超时
|
| 104 |
+
logger.error(f"导航到 {mask_url_for_logging(expected_url)} 超时 (超过90秒)")
|
| 105 |
+
logger.error("可能原因:网络连接缓慢、目标网站服务器无响应、代理问题、或页面资源被阻塞")
|
| 106 |
# 尝试保存诊断信息
|
| 107 |
try:
|
| 108 |
# 截图对于看到页面卡在什么状态非常有帮助(例如,空白页、加载中、Chrome错误页)
|
|
|
|
| 122 |
except PlaywrightError as e:
|
| 123 |
# 捕获其他Playwright相关的网络错误,例如DNS解析失败、连接被拒绝等
|
| 124 |
error_message = str(e)
|
| 125 |
+
logger.error(f"导航到 {mask_url_for_logging(expected_url)} 时发生 Playwright 网络错误")
|
| 126 |
logger.error(f"错误详情: {error_message}")
|
| 127 |
|
| 128 |
# Playwright的错误信息通常很具体,例如 "net::ERR_CONNECTION_REFUSED"
|
| 129 |
if "net::ERR_NAME_NOT_RESOLVED" in error_message:
|
| 130 |
+
logger.error("排查建议:检查DNS设置或域名是否正确")
|
| 131 |
elif "net::ERR_CONNECTION_REFUSED" in error_message:
|
| 132 |
+
logger.error("排查建议:目标服务器可能已关闭,或代理/防火墙阻止了连接")
|
| 133 |
elif "net::ERR_INTERNET_DISCONNECTED" in error_message:
|
| 134 |
+
logger.error("排查建议:检查本机的网络连接")
|
| 135 |
|
| 136 |
# 同样,尝试截图,尽管此时页面可能完全无法访问
|
| 137 |
try:
|
|
|
|
| 148 |
page.wait_for_timeout(2000)
|
| 149 |
|
| 150 |
final_url = page.url
|
| 151 |
+
logger.info(f"导航完成。最终URL为: {mask_url_for_logging(final_url)}")
|
| 152 |
|
| 153 |
# ... 你原有的URL检查逻辑保持不变 ...
|
| 154 |
if "accounts.google.com/v3/signin/identifier" in final_url:
|
| 155 |
+
logger.error("检测到Google登录页面(需要输入邮箱)。Cookie已完全失效")
|
| 156 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_identifier_page_{diagnostic_tag}.png"))
|
| 157 |
return
|
| 158 |
|
|
|
|
| 161 |
final_path = extract_url_path(final_url)
|
| 162 |
|
| 163 |
if expected_path and expected_path in final_path:
|
| 164 |
+
logger.info(f"URL验证通过。预期路径: {mask_path_for_logging(expected_path)}")
|
| 165 |
|
| 166 |
# --- 新的健壮策略:等待加载指示器���失 ---
|
| 167 |
# 这是解决竞态条件的关键。错误消息或内容只在初始加载完成后才会出现。
|
|
|
|
| 170 |
logger.info("正在等待加载指示器 (spinner) 消失... (最长等待30秒)")
|
| 171 |
# 我们等待spinner变为'隐藏'状态或从DOM中消失。
|
| 172 |
spinner_locator.wait_for(state='hidden', timeout=30000)
|
| 173 |
+
logger.info("加载指示器已消失。页面已完成异步加载")
|
| 174 |
except TimeoutError:
|
| 175 |
+
logger.error("页面加载指示器在30秒内未消失。页面可能已卡住")
|
| 176 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_spinner_stuck_{diagnostic_tag}.png"))
|
| 177 |
return # 如果页面加载卡住则退出
|
| 178 |
|
|
|
|
| 183 |
|
| 184 |
# 这里我们只需要很短的超时时间,因为页面应该是稳定的。
|
| 185 |
if auth_error_locator.is_visible(timeout=2000):
|
| 186 |
+
logger.error(f"检测到认证失败的错误横幅: '{auth_error_text}'. Cookie已过期或无效")
|
| 187 |
screenshot_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.png")
|
| 188 |
page.screenshot(path=screenshot_path)
|
| 189 |
|
|
|
|
| 194 |
return # 明确的失败,因此我们退出。
|
| 195 |
|
| 196 |
# --- 如果没有错误,进行最终确认(作为后备方案) ---
|
| 197 |
+
logger.info("未检测到认证错误横幅。进行最终确认")
|
| 198 |
login_button_cn = page.get_by_role('button', name='登录')
|
| 199 |
login_button_en = page.get_by_role('button', name='Login')
|
| 200 |
|
| 201 |
if login_button_cn.is_visible(timeout=1000) or login_button_en.is_visible(timeout=1000):
|
| 202 |
+
logger.error("页面上仍显示'登录'按钮。Cookie无效")
|
| 203 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_login_button_visible_{diagnostic_tag}.png"))
|
| 204 |
return
|
| 205 |
|
| 206 |
# --- 如果所有检查都通过,我们假设成功 ---
|
| 207 |
+
logger.info("所有验证通过,确认已成功登录")
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
handle_successful_navigation(page, logger, diagnostic_tag, shutdown_event, cookie_validator)
|
| 210 |
elif "accounts.google.com/v3/signin/accountchooser" in final_url:
|
| 211 |
+
logger.warning("检测到Google账户选择页面。登录失败或Cookie已过期")
|
| 212 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
|
| 213 |
return
|
| 214 |
else:
|
| 215 |
+
logger.error(f"导航到了意外的URL")
|
| 216 |
+
logger.error(f" 预期路径: {mask_path_for_logging(expected_path)}")
|
| 217 |
+
logger.error(f" 最终路径: {mask_path_for_logging(final_path)}")
|
| 218 |
+
logger.error(f" 最终URL: {mask_url_for_logging(final_url)}")
|
| 219 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_unexpected_url_{diagnostic_tag}.png"))
|
| 220 |
return
|
| 221 |
|
browser/navigation.py
CHANGED
|
@@ -16,9 +16,9 @@ def handle_untrusted_dialog(page: Page, logger=None):
|
|
| 16 |
logger.info(f"检测到弹窗,正在点击 'OK' 按钮...")
|
| 17 |
|
| 18 |
ok_button_locator.click(force=True)
|
| 19 |
-
logger.info(f"'OK'
|
| 20 |
expect(ok_button_locator).to_be_hidden(timeout=1000)
|
| 21 |
-
logger.info(f"
|
| 22 |
else:
|
| 23 |
logger.info(f"在10秒内未检测到弹窗,继续执行...")
|
| 24 |
except Exception as e:
|
|
@@ -28,13 +28,16 @@ def handle_successful_navigation(page: Page, logger, cookie_file_config, shutdow
|
|
| 28 |
"""
|
| 29 |
在成功导航到目标页面后,执行后续操作(处理弹窗、保持运行)。
|
| 30 |
"""
|
| 31 |
-
logger.info("
|
| 32 |
page.click('body') # 给予页面焦点
|
| 33 |
|
| 34 |
# 检查并处理 "Last modified by..." 的弹窗
|
| 35 |
handle_untrusted_dialog(page, logger=logger)
|
| 36 |
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# 等待页面加载和渲染
|
| 40 |
time.sleep(15)
|
|
|
|
| 16 |
logger.info(f"检测到弹窗,正在点击 'OK' 按钮...")
|
| 17 |
|
| 18 |
ok_button_locator.click(force=True)
|
| 19 |
+
logger.info(f"'OK' 按钮已点击")
|
| 20 |
expect(ok_button_locator).to_be_hidden(timeout=1000)
|
| 21 |
+
logger.info(f"弹窗已确认关闭")
|
| 22 |
else:
|
| 23 |
logger.info(f"在10秒内未检测到弹窗,继续执行...")
|
| 24 |
except Exception as e:
|
|
|
|
| 28 |
"""
|
| 29 |
在成功导航到目标页面后,执行后续操作(处理弹窗、保持运行)。
|
| 30 |
"""
|
| 31 |
+
logger.info("已成功到达目标页面")
|
| 32 |
page.click('body') # 给予页面焦点
|
| 33 |
|
| 34 |
# 检查并处理 "Last modified by..." 的弹窗
|
| 35 |
handle_untrusted_dialog(page, logger=logger)
|
| 36 |
|
| 37 |
+
if cookie_validator:
|
| 38 |
+
logger.info("Cookie验证器已创建,将定期验证Cookie有效性")
|
| 39 |
+
|
| 40 |
+
logger.info("实例将保持运行状态。每10秒点击一次页面以保持活动")
|
| 41 |
|
| 42 |
# 等待页面加载和渲染
|
| 43 |
time.sleep(15)
|
main.py
CHANGED
|
@@ -185,7 +185,7 @@ def load_instance_configurations(logger):
|
|
| 185 |
# 1. 读取所有实例共享的URL
|
| 186 |
shared_url = clean_env_value(os.getenv("CAMOUFOX_INSTANCE_URL"))
|
| 187 |
if not shared_url:
|
| 188 |
-
logger.error("错误: 缺少环境变量 CAMOUFOX_INSTANCE_URL。所有实例需要一个共享的目标URL
|
| 189 |
return None, None
|
| 190 |
|
| 191 |
# 2. 读取全局设置
|
|
@@ -204,7 +204,7 @@ def load_instance_configurations(logger):
|
|
| 204 |
|
| 205 |
# 检查是否有任何Cookie来源
|
| 206 |
if not sources:
|
| 207 |
-
logger.error("错误: 未找到任何Cookie来源(既没有JSON文件,也没有环境变量Cookie
|
| 208 |
return None, None
|
| 209 |
|
| 210 |
# 4. 为每个Cookie来源创建实例配置
|
|
@@ -240,7 +240,7 @@ def start_browser_instances(run_mode="standalone"):
|
|
| 240 |
|
| 241 |
global_settings, instance_profiles = load_instance_configurations(logger)
|
| 242 |
if not instance_profiles:
|
| 243 |
-
logger.error("错误:
|
| 244 |
return
|
| 245 |
|
| 246 |
for i, profile in enumerate(instance_profiles, 1):
|
|
@@ -419,7 +419,7 @@ def signal_handler(signum, frame):
|
|
| 419 |
except Exception as e:
|
| 420 |
logger.error(f"调用 terminate_all 时发生错误: {e}")
|
| 421 |
|
| 422 |
-
logger.info("
|
| 423 |
sys.exit(0)
|
| 424 |
|
| 425 |
def main():
|
|
|
|
| 185 |
# 1. 读取所有实例共享的URL
|
| 186 |
shared_url = clean_env_value(os.getenv("CAMOUFOX_INSTANCE_URL"))
|
| 187 |
if not shared_url:
|
| 188 |
+
logger.error("错误: 缺少环境变量 CAMOUFOX_INSTANCE_URL。所有实例需要一个共享的目标URL")
|
| 189 |
return None, None
|
| 190 |
|
| 191 |
# 2. 读取全局设置
|
|
|
|
| 204 |
|
| 205 |
# 检查是否有任何Cookie来源
|
| 206 |
if not sources:
|
| 207 |
+
logger.error("错误: 未找到任何Cookie来源(既没有JSON文件,也没有环境变量Cookie)")
|
| 208 |
return None, None
|
| 209 |
|
| 210 |
# 4. 为每个Cookie来源创建实例配置
|
|
|
|
| 240 |
|
| 241 |
global_settings, instance_profiles = load_instance_configurations(logger)
|
| 242 |
if not instance_profiles:
|
| 243 |
+
logger.error("错误: 环境变量中未找到任何实例配置")
|
| 244 |
return
|
| 245 |
|
| 246 |
for i, profile in enumerate(instance_profiles, 1):
|
|
|
|
| 419 |
except Exception as e:
|
| 420 |
logger.error(f"调用 terminate_all 时发生错误: {e}")
|
| 421 |
|
| 422 |
+
logger.info("应用关闭流程结束,主进程退出")
|
| 423 |
sys.exit(0)
|
| 424 |
|
| 425 |
def main():
|
utils/url_helper.py
CHANGED
|
@@ -44,3 +44,109 @@ def extract_url_path(url: str) -> str:
|
|
| 44 |
except Exception:
|
| 45 |
# 如果URL格式无效,返回空字符串
|
| 46 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
except Exception:
|
| 45 |
# 如果URL格式无效,返回空字符串
|
| 46 |
return ""
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def mask_path_for_logging(path: str) -> str:
|
| 50 |
+
"""
|
| 51 |
+
对路径进行脱敏处理,用于日志输出
|
| 52 |
+
|
| 53 |
+
脱敏规则:
|
| 54 |
+
1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替
|
| 55 |
+
2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整路径
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
path: URL路径字符串
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
脱敏后的路径字符串
|
| 62 |
+
|
| 63 |
+
Examples:
|
| 64 |
+
>>> mask_path_for_logging("/apps/drive/abcdef123456")
|
| 65 |
+
'/apps/drive/abcd***3456'
|
| 66 |
+
|
| 67 |
+
>>> mask_path_for_logging("/apps/drive/xyz789")
|
| 68 |
+
'/apps/drive/xyz789'
|
| 69 |
+
|
| 70 |
+
>>> mask_path_for_logging("/other/path")
|
| 71 |
+
'/other/path'
|
| 72 |
+
"""
|
| 73 |
+
if not path:
|
| 74 |
+
return ""
|
| 75 |
+
|
| 76 |
+
# 检查是否为 /apps/drive/ 路径
|
| 77 |
+
if path.startswith('/apps/drive/'):
|
| 78 |
+
# 提取路径中的ID部分
|
| 79 |
+
path_parts = path.split('/')
|
| 80 |
+
if len(path_parts) >= 4: # ['', 'apps', 'drive', 'ID']
|
| 81 |
+
drive_id = path_parts[3]
|
| 82 |
+
|
| 83 |
+
# 如果ID长度大于8,则进行脱敏处理
|
| 84 |
+
if len(drive_id) > 8:
|
| 85 |
+
# 使用与URL脱敏相同的格式
|
| 86 |
+
masked_id = f"{drive_id[:4]}***{drive_id[-4:]}"
|
| 87 |
+
# 重新构建路径
|
| 88 |
+
masked_parts = path_parts[:3] + [masked_id] + path_parts[4:]
|
| 89 |
+
return '/'.join(masked_parts)
|
| 90 |
+
|
| 91 |
+
# 如果不符合脱敏条件,返回原始路径
|
| 92 |
+
return path
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def mask_url_for_logging(url: str) -> str:
|
| 96 |
+
"""
|
| 97 |
+
对URL进行脱敏处理,用于日志输出
|
| 98 |
+
|
| 99 |
+
脱敏规则:
|
| 100 |
+
1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替
|
| 101 |
+
2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整URL
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
url: 完整URL字符串
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
脱敏后的URL字符串
|
| 108 |
+
|
| 109 |
+
Examples:
|
| 110 |
+
>>> mask_url_for_logging("https://ai.studio/apps/drive/abcdef123456")
|
| 111 |
+
'https://ai.studio/apps/drive/abcd***3456'
|
| 112 |
+
|
| 113 |
+
>>> mask_url_for_logging("https://aistudio.google.com/apps/drive/xyz789")
|
| 114 |
+
'https://aistudio.google.com/apps/drive/xyz789'
|
| 115 |
+
|
| 116 |
+
>>> mask_url_for_logging("https://example.com/other/path")
|
| 117 |
+
'https://example.com/other/path'
|
| 118 |
+
"""
|
| 119 |
+
if not url:
|
| 120 |
+
return ""
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
parsed = urlparse(url)
|
| 124 |
+
|
| 125 |
+
# 检查是否为 /apps/drive/ 路径
|
| 126 |
+
if parsed.path.startswith('/apps/drive/'):
|
| 127 |
+
# 提取路径中的ID部分
|
| 128 |
+
path_parts = parsed.path.split('/')
|
| 129 |
+
if len(path_parts) >= 4: # ['', 'apps', 'drive', 'ID']
|
| 130 |
+
drive_id = path_parts[3]
|
| 131 |
+
|
| 132 |
+
# 如果ID长度大于8,则进行脱敏处理
|
| 133 |
+
if len(drive_id) > 8:
|
| 134 |
+
masked_id = f"{drive_id[:4]}***{drive_id[-4:]}"
|
| 135 |
+
# 重新构建路径
|
| 136 |
+
masked_parts = path_parts[:3] + [masked_id] + path_parts[4:]
|
| 137 |
+
masked_path = '/'.join(masked_parts)
|
| 138 |
+
|
| 139 |
+
# 重新构建URL
|
| 140 |
+
result = f"{parsed.scheme}://{parsed.netloc}{masked_path}"
|
| 141 |
+
if parsed.query:
|
| 142 |
+
result += '?' + parsed.query
|
| 143 |
+
if parsed.fragment:
|
| 144 |
+
result += '#' + parsed.fragment
|
| 145 |
+
return result
|
| 146 |
+
|
| 147 |
+
# 如果不符合脱敏条件,返回原始URL
|
| 148 |
+
return url
|
| 149 |
+
|
| 150 |
+
except Exception:
|
| 151 |
+
# 如果URL解析失败,返回原始URL
|
| 152 |
+
return url
|