File size: 8,927 Bytes
8e02bdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
import { chromium, Browser, BrowserContext, Page } from "playwright-core";
import fs from "fs";
import logger from "@/lib/logger.ts";
import { getCookiesForBrowser } from "@/api/controllers/core.ts";
import provider from "@/lib/upstream-provider.ts";

// bdms SDK 相关脚本的白名单域名
const SCRIPT_WHITELIST_DOMAINS = provider.browserScriptWhitelistDomains;

// 需要屏蔽的资源类型(加速加载、减少内存)
const BLOCKED_RESOURCE_TYPES = ["image", "font", "stylesheet", "media"];

// 会话空闲超时时间(毫秒)
const SESSION_IDLE_TIMEOUT = 10 * 60 * 1000;

// bdms SDK 就绪等待超时(毫秒)
const BDMS_READY_TIMEOUT = 30000;

interface BrowserSession {
  context: BrowserContext;
  page: Page;
  lastUsed: number;
  idleTimer: NodeJS.Timeout | null;
}

class BrowserService {
  private browser: Browser | null = null;
  private sessions: Map<string, BrowserSession> = new Map();
  private launching: Promise<Browser> | null = null;

  private findExistingBrowserPath(): string | undefined {
    const candidates = [
      process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH,
      "C:/Program Files/Google/Chrome/Application/chrome.exe",
      "C:/Program Files/Google/Chrome/Application/chrome.exe",
      "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe",
      "C:/Program Files/Microsoft/Edge/Application/msedge.exe",
      "C:/Program Files (x86)/Microsoft/Edge/Application/msedge.exe",
    ].filter(Boolean) as string[];

    return candidates.find((candidate) => fs.existsSync(candidate));
  }

  /**
   * 懒启动浏览器实例
   */
  private async ensureBrowser(): Promise<Browser> {
    if (this.browser?.isConnected()) {
      return this.browser;
    }

    // 防止并发启动
    if (this.launching) {
      return this.launching;
    }

    this.launching = (async () => {
      logger.info("BrowserService: 正在启动 Chromium 浏览器...");
      try {
        const executablePath = this.findExistingBrowserPath() || "C:/Program Files/Google/Chrome/Application/chrome.exe";
        this.browser = await chromium.launch({
          headless: true,
          executablePath,
          args: [
            "--no-sandbox",
            "--disable-setuid-sandbox",
            "--disable-dev-shm-usage",
            "--disable-gpu",
            "--no-first-run",
            "--no-zygote",
            "--single-process",
          ],
        });

        this.browser.on("disconnected", () => {
          logger.warn("BrowserService: 浏览器已断开连接");
          this.browser = null;
          this.sessions.clear();
        });

        logger.info("BrowserService: Chromium 浏览器启动成功");
        return this.browser;
      } finally {
        this.launching = null;
      }
    })();

    return this.launching;
  }

  /**
   * 获取或创建指定 token 的浏览器会话
   */
  private async getSession(token: string): Promise<BrowserSession> {
    const existing = this.sessions.get(token);
    if (existing) {
      existing.lastUsed = Date.now();
      // 重置空闲计时器
      if (existing.idleTimer) {
        clearTimeout(existing.idleTimer);
      }
      existing.idleTimer = setTimeout(() => this.closeSession(token), SESSION_IDLE_TIMEOUT);
      return existing;
    }

    return this.createSession(token);
  }

  /**
   * 创建新的浏览器会话
   */
  private async createSession(token: string): Promise<BrowserSession> {
    const browser = await this.ensureBrowser();

    logger.info(`BrowserService: 为 token ${token.substring(0, 8)}... 创建新会话`);

    const context = await browser.newContext({
      userAgent:
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
      viewport: { width: 1920, height: 1080 },
      locale: provider.browserLocale,
    });

    // 注入 cookies
    const cookies = getCookiesForBrowser(token);
    await context.addCookies(cookies);

    // 配置资源拦截
    await context.route("**/*", (route) => {
      const request = route.request();
      const resourceType = request.resourceType();
      const url = request.url();

      // 屏蔽不需要的资源类型
      if (BLOCKED_RESOURCE_TYPES.includes(resourceType)) {
        return route.abort();
      }

      // 对于脚本资源,只允许白名单域名
      if (resourceType === "script") {
        const isWhitelisted = SCRIPT_WHITELIST_DOMAINS.some((domain) =>
          url.includes(domain)
        );
        if (!isWhitelisted) {
          return route.abort();
        }
      }

      return route.continue();
    });

    const page = await context.newPage();

    // 导航到即梦页面,让 bdms SDK 加载
    logger.info(`BrowserService: 正在导航到 ${provider.browserEntryUrl} ...`);
    await page.goto(provider.browserEntryUrl, {
      waitUntil: "domcontentloaded",
      timeout: 30000,
    });

    // 等待 bdms SDK 就绪
    logger.info("BrowserService: 等待 bdms SDK 就绪...");
    try {
      await page.waitForFunction(
        () => {
          // bdms SDK 会替换 window.fetch,检测其是否被替换
          // 也可以检测 window.bdms 或 window.byted_acrawler
          return (
            (window as any).bdms?.init ||
            (window as any).byted_acrawler ||
            // 检测 fetch 是否被替换(bdms 会替换原生 fetch)
            window.fetch.toString().indexOf("native code") === -1
          );
        },
        { timeout: BDMS_READY_TIMEOUT }
      );
      logger.info("BrowserService: bdms SDK 已就绪");
    } catch (err) {
      logger.warn(
        "BrowserService: bdms SDK 等待超时,可能未完全加载,继续尝试..."
      );
    }

    const session: BrowserSession = {
      context,
      page,
      lastUsed: Date.now(),
      idleTimer: setTimeout(() => this.closeSession(token), SESSION_IDLE_TIMEOUT),
    };

    this.sessions.set(token, session);
    return session;
  }

  /**
   * 关闭指定 token 的会话
   */
  private async closeSession(token: string) {
    const session = this.sessions.get(token);
    if (!session) return;

    logger.info(`BrowserService: 关闭空闲会话 ${token.substring(0, 8)}...`);
    if (session.idleTimer) {
      clearTimeout(session.idleTimer);
    }

    try {
      await session.context.close();
    } catch (err) {
      // 忽略关闭错误
    }

    this.sessions.delete(token);
  }

  /**
   * 通过浏览器代理发送 fetch 请求
   * bdms SDK 会自动拦截 fetch 并注入 a_bogus 签名
   *
   * @param token sessionid
   * @param url 完整的请求 URL
   * @param options fetch 选项 (method, headers, body)
   * @returns 解析后的 JSON 响应
   */
  async fetch(
    token: string,
    url: string,
    options: { method?: string; headers?: Record<string, string>; body?: string }
  ): Promise<any> {
    const session = await this.getSession(token);

    logger.info(`BrowserService: 代理请求 ${options.method || "GET"} ${url.substring(0, 100)}...`);

    try {
      const result = await session.page.evaluate(
        async ({ url, options }) => {
          try {
            const res = await fetch(url, {
              method: options.method || "GET",
              headers: {
                "Content-Type": "application/json",
                ...(options.headers || {}),
              },
              body: options.body,
              credentials: "include",
            });
            const text = await res.text();
            return { ok: res.ok, status: res.status, text };
          } catch (err: any) {
            return { ok: false, status: 0, text: "", error: err.message };
          }
        },
        { url, options }
      );

      if (result.error) {
        throw new Error(`浏览器 fetch 失败: ${result.error}`);
      }

      logger.info(`BrowserService: 响应状态 ${result.status}`);

      try {
        return JSON.parse(result.text);
      } catch {
        logger.warn(`BrowserService: 响应不是有效 JSON: ${result.text.substring(0, 200)}`);
        return result.text;
      }
    } catch (err) {
      // 如果执行失败(页面崩溃等),清理会话以便下次重建
      logger.error(`BrowserService: 请求执行失败: ${(err as Error).message}`);
      await this.closeSession(token);
      throw err;
    }
  }

  /**
   * 关闭所有会话和浏览器实例
   */
  async close() {
    logger.info("BrowserService: 正在关闭所有会话和浏览器...");

    for (const [token] of this.sessions) {
      await this.closeSession(token);
    }

    if (this.browser) {
      try {
        await this.browser.close();
      } catch (err) {
        // 忽略关闭错误
      }
      this.browser = null;
    }

    logger.info("BrowserService: 已关闭");
  }
}

// 单例导出
const browserService = new BrowserService();
export default browserService;