Spaces:
Sleeping
Sleeping
File size: 8,927 Bytes
8e02bdb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | import { chromium, Browser, BrowserContext, Page } from "playwright-core";
import fs from "fs";
import logger from "@/lib/logger.ts";
import { getCookiesForBrowser } from "@/api/controllers/core.ts";
import provider from "@/lib/upstream-provider.ts";
// bdms SDK 相关脚本的白名单域名
const SCRIPT_WHITELIST_DOMAINS = provider.browserScriptWhitelistDomains;
// 需要屏蔽的资源类型(加速加载、减少内存)
const BLOCKED_RESOURCE_TYPES = ["image", "font", "stylesheet", "media"];
// 会话空闲超时时间(毫秒)
const SESSION_IDLE_TIMEOUT = 10 * 60 * 1000;
// bdms SDK 就绪等待超时(毫秒)
const BDMS_READY_TIMEOUT = 30000;
interface BrowserSession {
context: BrowserContext;
page: Page;
lastUsed: number;
idleTimer: NodeJS.Timeout | null;
}
class BrowserService {
private browser: Browser | null = null;
private sessions: Map<string, BrowserSession> = new Map();
private launching: Promise<Browser> | null = null;
private findExistingBrowserPath(): string | undefined {
const candidates = [
process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH,
"C:/Program Files/Google/Chrome/Application/chrome.exe",
"C:/Program Files/Google/Chrome/Application/chrome.exe",
"C:/Program Files (x86)/Google/Chrome/Application/chrome.exe",
"C:/Program Files/Microsoft/Edge/Application/msedge.exe",
"C:/Program Files (x86)/Microsoft/Edge/Application/msedge.exe",
].filter(Boolean) as string[];
return candidates.find((candidate) => fs.existsSync(candidate));
}
/**
* 懒启动浏览器实例
*/
private async ensureBrowser(): Promise<Browser> {
if (this.browser?.isConnected()) {
return this.browser;
}
// 防止并发启动
if (this.launching) {
return this.launching;
}
this.launching = (async () => {
logger.info("BrowserService: 正在启动 Chromium 浏览器...");
try {
const executablePath = this.findExistingBrowserPath() || "C:/Program Files/Google/Chrome/Application/chrome.exe";
this.browser = await chromium.launch({
headless: true,
executablePath,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--no-first-run",
"--no-zygote",
"--single-process",
],
});
this.browser.on("disconnected", () => {
logger.warn("BrowserService: 浏览器已断开连接");
this.browser = null;
this.sessions.clear();
});
logger.info("BrowserService: Chromium 浏览器启动成功");
return this.browser;
} finally {
this.launching = null;
}
})();
return this.launching;
}
/**
* 获取或创建指定 token 的浏览器会话
*/
private async getSession(token: string): Promise<BrowserSession> {
const existing = this.sessions.get(token);
if (existing) {
existing.lastUsed = Date.now();
// 重置空闲计时器
if (existing.idleTimer) {
clearTimeout(existing.idleTimer);
}
existing.idleTimer = setTimeout(() => this.closeSession(token), SESSION_IDLE_TIMEOUT);
return existing;
}
return this.createSession(token);
}
/**
* 创建新的浏览器会话
*/
private async createSession(token: string): Promise<BrowserSession> {
const browser = await this.ensureBrowser();
logger.info(`BrowserService: 为 token ${token.substring(0, 8)}... 创建新会话`);
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
viewport: { width: 1920, height: 1080 },
locale: provider.browserLocale,
});
// 注入 cookies
const cookies = getCookiesForBrowser(token);
await context.addCookies(cookies);
// 配置资源拦截
await context.route("**/*", (route) => {
const request = route.request();
const resourceType = request.resourceType();
const url = request.url();
// 屏蔽不需要的资源类型
if (BLOCKED_RESOURCE_TYPES.includes(resourceType)) {
return route.abort();
}
// 对于脚本资源,只允许白名单域名
if (resourceType === "script") {
const isWhitelisted = SCRIPT_WHITELIST_DOMAINS.some((domain) =>
url.includes(domain)
);
if (!isWhitelisted) {
return route.abort();
}
}
return route.continue();
});
const page = await context.newPage();
// 导航到即梦页面,让 bdms SDK 加载
logger.info(`BrowserService: 正在导航到 ${provider.browserEntryUrl} ...`);
await page.goto(provider.browserEntryUrl, {
waitUntil: "domcontentloaded",
timeout: 30000,
});
// 等待 bdms SDK 就绪
logger.info("BrowserService: 等待 bdms SDK 就绪...");
try {
await page.waitForFunction(
() => {
// bdms SDK 会替换 window.fetch,检测其是否被替换
// 也可以检测 window.bdms 或 window.byted_acrawler
return (
(window as any).bdms?.init ||
(window as any).byted_acrawler ||
// 检测 fetch 是否被替换(bdms 会替换原生 fetch)
window.fetch.toString().indexOf("native code") === -1
);
},
{ timeout: BDMS_READY_TIMEOUT }
);
logger.info("BrowserService: bdms SDK 已就绪");
} catch (err) {
logger.warn(
"BrowserService: bdms SDK 等待超时,可能未完全加载,继续尝试..."
);
}
const session: BrowserSession = {
context,
page,
lastUsed: Date.now(),
idleTimer: setTimeout(() => this.closeSession(token), SESSION_IDLE_TIMEOUT),
};
this.sessions.set(token, session);
return session;
}
/**
* 关闭指定 token 的会话
*/
private async closeSession(token: string) {
const session = this.sessions.get(token);
if (!session) return;
logger.info(`BrowserService: 关闭空闲会话 ${token.substring(0, 8)}...`);
if (session.idleTimer) {
clearTimeout(session.idleTimer);
}
try {
await session.context.close();
} catch (err) {
// 忽略关闭错误
}
this.sessions.delete(token);
}
/**
* 通过浏览器代理发送 fetch 请求
* bdms SDK 会自动拦截 fetch 并注入 a_bogus 签名
*
* @param token sessionid
* @param url 完整的请求 URL
* @param options fetch 选项 (method, headers, body)
* @returns 解析后的 JSON 响应
*/
async fetch(
token: string,
url: string,
options: { method?: string; headers?: Record<string, string>; body?: string }
): Promise<any> {
const session = await this.getSession(token);
logger.info(`BrowserService: 代理请求 ${options.method || "GET"} ${url.substring(0, 100)}...`);
try {
const result = await session.page.evaluate(
async ({ url, options }) => {
try {
const res = await fetch(url, {
method: options.method || "GET",
headers: {
"Content-Type": "application/json",
...(options.headers || {}),
},
body: options.body,
credentials: "include",
});
const text = await res.text();
return { ok: res.ok, status: res.status, text };
} catch (err: any) {
return { ok: false, status: 0, text: "", error: err.message };
}
},
{ url, options }
);
if (result.error) {
throw new Error(`浏览器 fetch 失败: ${result.error}`);
}
logger.info(`BrowserService: 响应状态 ${result.status}`);
try {
return JSON.parse(result.text);
} catch {
logger.warn(`BrowserService: 响应不是有效 JSON: ${result.text.substring(0, 200)}`);
return result.text;
}
} catch (err) {
// 如果执行失败(页面崩溃等),清理会话以便下次重建
logger.error(`BrowserService: 请求执行失败: ${(err as Error).message}`);
await this.closeSession(token);
throw err;
}
}
/**
* 关闭所有会话和浏览器实例
*/
async close() {
logger.info("BrowserService: 正在关闭所有会话和浏览器...");
for (const [token] of this.sessions) {
await this.closeSession(token);
}
if (this.browser) {
try {
await this.browser.close();
} catch (err) {
// 忽略关闭错误
}
this.browser = null;
}
logger.info("BrowserService: 已关闭");
}
}
// 单例导出
const browserService = new BrowserService();
export default browserService;
|