nomagick commited on
Commit
9e02080
·
unverified ·
1 Parent(s): 55b954f

fix: error on browser crashes

Browse files
backend/functions/package-lock.json CHANGED
@@ -14,19 +14,19 @@
14
  "archiver": "^6.0.1",
15
  "axios": "^1.3.3",
16
  "bcrypt": "^5.1.0",
17
- "civkit": "^0.6.5-7a4ba56",
18
  "cors": "^2.8.5",
19
  "dayjs": "^1.11.9",
20
  "express": "^4.19.2",
21
  "firebase-admin": "^12.1.0",
22
- "firebase-functions": "^4.8.0",
23
  "generic-pool": "^3.9.0",
24
  "htmlparser2": "^9.0.0",
25
  "jose": "^5.1.0",
26
  "langdetect": "^0.2.1",
27
  "minio": "^7.1.3",
28
  "openai": "^4.20.0",
29
- "puppeteer": "^22.6.3",
30
  "puppeteer-extra": "^3.3.6",
31
  "puppeteer-extra-plugin-block-resources": "^2.4.3",
32
  "puppeteer-extra-plugin-page-proxy": "^2.0.0",
@@ -1963,9 +1963,9 @@
1963
  "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
1964
  },
1965
  "node_modules/@puppeteer/browsers": {
1966
- "version": "2.2.1",
1967
- "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.2.1.tgz",
1968
- "integrity": "sha512-QSXujx4d4ogDamQA8ckkkRieFzDgZEuZuGiey9G7CuDcbnX4iINKWxTPC5Br2AEzY9ICAvcndqgAUFMMKnS/Tw==",
1969
  "dependencies": {
1970
  "debug": "4.3.4",
1971
  "extract-zip": "2.0.1",
@@ -3645,9 +3645,9 @@
3645
  }
3646
  },
3647
  "node_modules/chromium-bidi": {
3648
- "version": "0.5.17",
3649
- "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.17.tgz",
3650
- "integrity": "sha512-BqOuIWUgTPj8ayuBFJUYCCuwIcwjBsb3/614P7tt1bEPJ4i1M0kCdIl0Wi9xhtswBXnfO2bTpTMkHD71H8rJMg==",
3651
  "dependencies": {
3652
  "mitt": "3.0.1",
3653
  "urlpattern-polyfill": "10.0.0",
@@ -3674,9 +3674,9 @@
3674
  }
3675
  },
3676
  "node_modules/civkit": {
3677
- "version": "0.6.5-7a4ba56",
3678
- "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.6.5-7a4ba56.tgz",
3679
- "integrity": "sha512-WAKnZn7DwuHkjEaH/bGXN4ZSYFvzM06ky1S9LjzHd1Ud+fMd3sEJR0b68BprzqXdeBNB5LyPHO4Gikf1z7J1bA==",
3680
  "dependencies": {
3681
  "lodash": "^4.17.21",
3682
  "tslib": "^2.5.0"
@@ -4284,9 +4284,9 @@
4284
  }
4285
  },
4286
  "node_modules/devtools-protocol": {
4287
- "version": "0.0.1262051",
4288
- "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1262051.tgz",
4289
- "integrity": "sha512-YJe4CT5SA8on3Spa+UDtNhEqtuV6Epwz3OZ4HQVLhlRccpZ9/PAYk0/cy/oKxFKRrZPBUPyxympQci4yWNWZ9g=="
4290
  },
4291
  "node_modules/diff-sequences": {
4292
  "version": "29.6.3",
@@ -9464,15 +9464,15 @@
9464
  }
9465
  },
9466
  "node_modules/puppeteer": {
9467
- "version": "22.6.4",
9468
- "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.6.4.tgz",
9469
- "integrity": "sha512-J9hXNwZmuqKDmNMj6kednZH8jzbdX9735NQfQJrq5LRD4nHisAMyW9pCD7glKi+iM7RV9JkesI1MYhdsN+0ZSQ==",
9470
  "hasInstallScript": true,
9471
  "dependencies": {
9472
- "@puppeteer/browsers": "2.2.1",
9473
  "cosmiconfig": "9.0.0",
9474
- "devtools-protocol": "0.0.1262051",
9475
- "puppeteer-core": "22.6.4"
9476
  },
9477
  "bin": {
9478
  "puppeteer": "lib/esm/puppeteer/node/cli.js"
@@ -9482,14 +9482,14 @@
9482
  }
9483
  },
9484
  "node_modules/puppeteer-core": {
9485
- "version": "22.6.4",
9486
- "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.6.4.tgz",
9487
- "integrity": "sha512-QtfJwPmqQec3EHc6LqbEz03vSiuVAr9bYp0TV87dLoreev6ZevsXdLgOfQgoA3GocrsSe/eUf7NRPQ1lQfsc3w==",
9488
  "dependencies": {
9489
- "@puppeteer/browsers": "2.2.1",
9490
- "chromium-bidi": "0.5.17",
9491
  "debug": "4.3.4",
9492
- "devtools-protocol": "0.0.1262051",
9493
  "ws": "8.16.0"
9494
  },
9495
  "engines": {
 
14
  "archiver": "^6.0.1",
15
  "axios": "^1.3.3",
16
  "bcrypt": "^5.1.0",
17
+ "civkit": "^0.6.5-047c0d8",
18
  "cors": "^2.8.5",
19
  "dayjs": "^1.11.9",
20
  "express": "^4.19.2",
21
  "firebase-admin": "^12.1.0",
22
+ "firebase-functions": "^4.9.0",
23
  "generic-pool": "^3.9.0",
24
  "htmlparser2": "^9.0.0",
25
  "jose": "^5.1.0",
26
  "langdetect": "^0.2.1",
27
  "minio": "^7.1.3",
28
  "openai": "^4.20.0",
29
+ "puppeteer": "^22.7.1",
30
  "puppeteer-extra": "^3.3.6",
31
  "puppeteer-extra-plugin-block-resources": "^2.4.3",
32
  "puppeteer-extra-plugin-page-proxy": "^2.0.0",
 
1963
  "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
1964
  },
1965
  "node_modules/@puppeteer/browsers": {
1966
+ "version": "2.2.3",
1967
+ "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.2.3.tgz",
1968
+ "integrity": "sha512-bJ0UBsk0ESOs6RFcLXOt99a3yTDcOKlzfjad+rhFwdaG1Lu/Wzq58GHYCDTlZ9z6mldf4g+NTb+TXEfe0PpnsQ==",
1969
  "dependencies": {
1970
  "debug": "4.3.4",
1971
  "extract-zip": "2.0.1",
 
3645
  }
3646
  },
3647
  "node_modules/chromium-bidi": {
3648
+ "version": "0.5.19",
3649
+ "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.19.tgz",
3650
+ "integrity": "sha512-UA6zL77b7RYCjJkZBsZ0wlvCTD+jTjllZ8f6wdO4buevXgTZYjV+XLB9CiEa2OuuTGGTLnI7eN9I60YxuALGQg==",
3651
  "dependencies": {
3652
  "mitt": "3.0.1",
3653
  "urlpattern-polyfill": "10.0.0",
 
3674
  }
3675
  },
3676
  "node_modules/civkit": {
3677
+ "version": "0.6.5-047c0d8",
3678
+ "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.6.5-047c0d8.tgz",
3679
+ "integrity": "sha512-4FWHrkJQHbTD3wjNeihxOzm7GSgQa9BUgSvPOLsfKybeEw9Pv+I94uDUP8PczL1TpHO6hIbIE2KJjzSOx6PYqg==",
3680
  "dependencies": {
3681
  "lodash": "^4.17.21",
3682
  "tslib": "^2.5.0"
 
4284
  }
4285
  },
4286
  "node_modules/devtools-protocol": {
4287
+ "version": "0.0.1273771",
4288
+ "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1273771.tgz",
4289
+ "integrity": "sha512-QDbb27xcTVReQQW/GHJsdQqGKwYBE7re7gxehj467kKP2DKuYBUj6i2k5LRiAC66J1yZG/9gsxooz/s9pcm0Og=="
4290
  },
4291
  "node_modules/diff-sequences": {
4292
  "version": "29.6.3",
 
9464
  }
9465
  },
9466
  "node_modules/puppeteer": {
9467
+ "version": "22.7.1",
9468
+ "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.7.1.tgz",
9469
+ "integrity": "sha512-JBCBCwQ9+dyPp5haqeecgv0N0vgWFx44woUeKJaPeJT8CU3RXrd8F/tqJQbuAmcWlbMhYJSlTJkIFrwVAs6BNA==",
9470
  "hasInstallScript": true,
9471
  "dependencies": {
9472
+ "@puppeteer/browsers": "2.2.3",
9473
  "cosmiconfig": "9.0.0",
9474
+ "devtools-protocol": "0.0.1273771",
9475
+ "puppeteer-core": "22.7.1"
9476
  },
9477
  "bin": {
9478
  "puppeteer": "lib/esm/puppeteer/node/cli.js"
 
9482
  }
9483
  },
9484
  "node_modules/puppeteer-core": {
9485
+ "version": "22.7.1",
9486
+ "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.7.1.tgz",
9487
+ "integrity": "sha512-jD7T7yN7PWGuJmNT0TAEboA26s0VVnvbgCxqgQIF+eNQW2u71ENaV2JwzSJiCHO+e72H4Ue6AgKD9USQ8xAcOQ==",
9488
  "dependencies": {
9489
+ "@puppeteer/browsers": "2.2.3",
9490
+ "chromium-bidi": "0.5.19",
9491
  "debug": "4.3.4",
9492
+ "devtools-protocol": "0.0.1273771",
9493
  "ws": "8.16.0"
9494
  },
9495
  "engines": {
backend/functions/package.json CHANGED
@@ -34,19 +34,19 @@
34
  "archiver": "^6.0.1",
35
  "axios": "^1.3.3",
36
  "bcrypt": "^5.1.0",
37
- "civkit": "^0.6.5-7a4ba56",
38
  "cors": "^2.8.5",
39
  "dayjs": "^1.11.9",
40
  "express": "^4.19.2",
41
  "firebase-admin": "^12.1.0",
42
- "firebase-functions": "^4.8.0",
43
  "generic-pool": "^3.9.0",
44
  "htmlparser2": "^9.0.0",
45
  "jose": "^5.1.0",
46
  "langdetect": "^0.2.1",
47
  "minio": "^7.1.3",
48
  "openai": "^4.20.0",
49
- "puppeteer": "^22.6.3",
50
  "puppeteer-extra": "^3.3.6",
51
  "puppeteer-extra-plugin-block-resources": "^2.4.3",
52
  "puppeteer-extra-plugin-page-proxy": "^2.0.0",
 
34
  "archiver": "^6.0.1",
35
  "axios": "^1.3.3",
36
  "bcrypt": "^5.1.0",
37
+ "civkit": "^0.6.5-047c0d8",
38
  "cors": "^2.8.5",
39
  "dayjs": "^1.11.9",
40
  "express": "^4.19.2",
41
  "firebase-admin": "^12.1.0",
42
+ "firebase-functions": "^4.9.0",
43
  "generic-pool": "^3.9.0",
44
  "htmlparser2": "^9.0.0",
45
  "jose": "^5.1.0",
46
  "langdetect": "^0.2.1",
47
  "minio": "^7.1.3",
48
  "openai": "^4.20.0",
49
+ "puppeteer": "^22.7.1",
50
  "puppeteer-extra": "^3.3.6",
51
  "puppeteer-extra-plugin-block-resources": "^2.4.3",
52
  "puppeteer-extra-plugin-page-proxy": "^2.0.0",
backend/functions/src/index.ts CHANGED
@@ -1,11 +1,11 @@
1
  import 'reflect-metadata';
 
2
  import { initializeApp } from 'firebase-admin/app';
3
  initializeApp();
4
 
5
 
6
  import { loadModulesDynamically, registry } from './shared';
7
  import path from 'path';
8
- import { ApplicationError } from 'civkit';
9
  loadModulesDynamically(path.resolve(__dirname, 'cloud-functions'));
10
 
11
  Object.assign(exports, registry.exportAll());
@@ -16,24 +16,14 @@ Object.assign(exports, registry.exportGrouped({
16
  registry.title = 'reader';
17
  registry.version = '0.1.0';
18
 
19
- process.on('unhandledRejection', (err) => {
20
- // Walk around Firebase runtime bug.
21
- if (err instanceof ApplicationError) {
22
- // Application error shall not crash the process;
23
- return;
24
- }
25
-
26
- // Looks like Firebase runtime does not handle error properly.
27
- // Make sure to quit the process.
28
- process.nextTick(() => process.exit(1));
29
-
30
- throw err;
31
- });
32
 
33
  process.on('uncaughtException', (err) => {
 
 
34
  // Looks like Firebase runtime does not handle error properly.
35
  // Make sure to quit the process.
36
  process.nextTick(() => process.exit(1));
37
-
38
  throw err;
39
  });
 
1
  import 'reflect-metadata';
2
+ import './shared/lib/doom-domain';
3
  import { initializeApp } from 'firebase-admin/app';
4
  initializeApp();
5
 
6
 
7
  import { loadModulesDynamically, registry } from './shared';
8
  import path from 'path';
 
9
  loadModulesDynamically(path.resolve(__dirname, 'cloud-functions'));
10
 
11
  Object.assign(exports, registry.exportAll());
 
16
  registry.title = 'reader';
17
  registry.version = '0.1.0';
18
 
19
+ process.on('unhandledRejection', (_err) => `Somehow is false alarm in firebase`);
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  process.on('uncaughtException', (err) => {
22
+ console.log('Uncaught exception', err);
23
+
24
  // Looks like Firebase runtime does not handle error properly.
25
  // Make sure to quit the process.
26
  process.nextTick(() => process.exit(1));
27
+ console.error('Uncaught exception, process quit.');
28
  throw err;
29
  });
backend/functions/src/services/puppeteer.ts CHANGED
@@ -10,6 +10,7 @@ import puppeteer from 'puppeteer-extra';
10
 
11
  import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
12
  import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
 
13
 
14
 
15
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
@@ -85,7 +86,6 @@ export class PuppeteerControl extends AsyncService {
85
  await Promise.race([
86
  (async () => {
87
  const ctx = page.browserContext();
88
- await page.removeExposedFunction('reportSnapshot');
89
  await page.close();
90
  await ctx.close();
91
  })(), delay(5000)
@@ -110,6 +110,7 @@ export class PuppeteerControl extends AsyncService {
110
 
111
  constructor(protected globalLogger: Logger) {
112
  super(...arguments);
 
113
  }
114
 
115
  override async init() {
@@ -141,12 +142,13 @@ export class PuppeteerControl extends AsyncService {
141
  this.browser.once('disconnected', () => {
142
  this.logger.warn(`Browser disconnected`);
143
  this.emit('crippled');
 
144
  });
145
  this.logger.info(`Browser launched: ${this.browser.process()?.pid}`);
146
 
147
  this.emit('ready');
148
 
149
- this.__healthCheckInterval = setInterval(() => this.healthCheck(), 30_000);
150
  }
151
 
152
  @maxConcurrency(1)
@@ -235,6 +237,8 @@ function giveSnapshot() {
235
  `));
236
  await Promise.all(preparations);
237
 
 
 
238
  await page.evaluateOnNewDocument(`
239
  let aftershot = undefined;
240
  const handlePageLoad = () => {
@@ -262,8 +266,6 @@ document.addEventListener('readystatechange', handlePageLoad);
262
  document.addEventListener('load', handlePageLoad);
263
  `);
264
 
265
- // TODO: further setup the page;
266
-
267
  return page;
268
  }
269
 
@@ -272,7 +274,6 @@ document.addEventListener('load', handlePageLoad);
272
  const url = parsedUrl.toString();
273
 
274
  this.logger.info(`Scraping ${url}`, { url });
275
-
276
  let snapshot: PageSnapshot | undefined;
277
  let screenshot: Buffer | undefined;
278
 
@@ -285,6 +286,11 @@ document.addEventListener('load', handlePageLoad);
285
  }
286
 
287
  let nextSnapshotDeferred = Defer();
 
 
 
 
 
288
  let finalized = false;
289
  const hdl = (s: any) => {
290
  if (snapshot === s) {
@@ -293,6 +299,10 @@ document.addEventListener('load', handlePageLoad);
293
  snapshot = s;
294
  nextSnapshotDeferred.resolve(s);
295
  nextSnapshotDeferred = Defer();
 
 
 
 
296
  };
297
  page.on('snapshot', hdl);
298
 
 
10
 
11
  import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
12
  import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
13
+ import { ServiceCrashedError } from '../shared/lib/errors';
14
 
15
 
16
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
 
86
  await Promise.race([
87
  (async () => {
88
  const ctx = page.browserContext();
 
89
  await page.close();
90
  await ctx.close();
91
  })(), delay(5000)
 
110
 
111
  constructor(protected globalLogger: Logger) {
112
  super(...arguments);
113
+ this.setMaxListeners(2 * this.pagePool.max + 1);
114
  }
115
 
116
  override async init() {
 
142
  this.browser.once('disconnected', () => {
143
  this.logger.warn(`Browser disconnected`);
144
  this.emit('crippled');
145
+ process.nextTick(()=> this.serviceReady());
146
  });
147
  this.logger.info(`Browser launched: ${this.browser.process()?.pid}`);
148
 
149
  this.emit('ready');
150
 
151
+ // this.__healthCheckInterval = setInterval(() => this.healthCheck(), 30_000);
152
  }
153
 
154
  @maxConcurrency(1)
 
237
  `));
238
  await Promise.all(preparations);
239
 
240
+ await page.goto('about:blank', { waitUntil: 'domcontentloaded' });
241
+
242
  await page.evaluateOnNewDocument(`
243
  let aftershot = undefined;
244
  const handlePageLoad = () => {
 
266
  document.addEventListener('load', handlePageLoad);
267
  `);
268
 
 
 
269
  return page;
270
  }
271
 
 
274
  const url = parsedUrl.toString();
275
 
276
  this.logger.info(`Scraping ${url}`, { url });
 
277
  let snapshot: PageSnapshot | undefined;
278
  let screenshot: Buffer | undefined;
279
 
 
286
  }
287
 
288
  let nextSnapshotDeferred = Defer();
289
+ const crippleListener = () => nextSnapshotDeferred.reject(new ServiceCrashedError({ message: `Browser crashed, try again` }));
290
+ this.once('crippled', crippleListener);
291
+ nextSnapshotDeferred.promise.finally(() => {
292
+ this.off('crippled', crippleListener);
293
+ });
294
  let finalized = false;
295
  const hdl = (s: any) => {
296
  if (snapshot === s) {
 
299
  snapshot = s;
300
  nextSnapshotDeferred.resolve(s);
301
  nextSnapshotDeferred = Defer();
302
+ this.once('crippled', crippleListener);
303
+ nextSnapshotDeferred.promise.finally(() => {
304
+ this.off('crippled', crippleListener);
305
+ });
306
  };
307
  page.on('snapshot', hdl);
308
 
thinapps-shared CHANGED
@@ -1 +1 @@
1
- Subproject commit e2a1d586063f8e8d663c013fa2febe9f621f9f8e
 
1
+ Subproject commit a6a3ad42efc34da243afa25d602b405b92f13379