Spaces:
Build error
Build error
fix: side load context bridging
Browse files- package-lock.json +4 -4
- package.json +1 -1
- src/services/puppeteer.ts +70 -56
- thinapps-shared +1 -1
package-lock.json
CHANGED
|
@@ -17,7 +17,7 @@
|
|
| 17 |
"axios": "^1.3.3",
|
| 18 |
"bcrypt": "^5.1.0",
|
| 19 |
"busboy": "^1.6.0",
|
| 20 |
-
"civkit": "^0.8.4-
|
| 21 |
"core-js": "^3.37.1",
|
| 22 |
"cors": "^2.8.5",
|
| 23 |
"dayjs": "^1.11.9",
|
|
@@ -4095,9 +4095,9 @@
|
|
| 4095 |
}
|
| 4096 |
},
|
| 4097 |
"node_modules/civkit": {
|
| 4098 |
-
"version": "0.8.4-
|
| 4099 |
-
"resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-
|
| 4100 |
-
"integrity": "sha512-
|
| 4101 |
"license": "AGPL",
|
| 4102 |
"dependencies": {
|
| 4103 |
"lodash": "^4.17.21",
|
|
|
|
| 17 |
"axios": "^1.3.3",
|
| 18 |
"bcrypt": "^5.1.0",
|
| 19 |
"busboy": "^1.6.0",
|
| 20 |
+
"civkit": "^0.8.4-6ed9027",
|
| 21 |
"core-js": "^3.37.1",
|
| 22 |
"cors": "^2.8.5",
|
| 23 |
"dayjs": "^1.11.9",
|
|
|
|
| 4095 |
}
|
| 4096 |
},
|
| 4097 |
"node_modules/civkit": {
|
| 4098 |
+
"version": "0.8.4-6ed9027",
|
| 4099 |
+
"resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-6ed9027.tgz",
|
| 4100 |
+
"integrity": "sha512-VU8Ykik1L16Li9/QZfw5wYsmu3jJYH/zIHbM6Vd2ajRI7Mh4fSO3cXadUntM190BersLW9Fts+qunDPabhIWZA==",
|
| 4101 |
"license": "AGPL",
|
| 4102 |
"dependencies": {
|
| 4103 |
"lodash": "^4.17.21",
|
package.json
CHANGED
|
@@ -25,7 +25,7 @@
|
|
| 25 |
"axios": "^1.3.3",
|
| 26 |
"bcrypt": "^5.1.0",
|
| 27 |
"busboy": "^1.6.0",
|
| 28 |
-
"civkit": "^0.8.4-
|
| 29 |
"core-js": "^3.37.1",
|
| 30 |
"cors": "^2.8.5",
|
| 31 |
"dayjs": "^1.11.9",
|
|
|
|
| 25 |
"axios": "^1.3.3",
|
| 26 |
"bcrypt": "^5.1.0",
|
| 27 |
"busboy": "^1.6.0",
|
| 28 |
+
"civkit": "^0.8.4-6ed9027",
|
| 29 |
"core-js": "^3.37.1",
|
| 30 |
"cors": "^2.8.5",
|
| 31 |
"dayjs": "^1.11.9",
|
src/services/puppeteer.ts
CHANGED
|
@@ -17,6 +17,7 @@ import { isIP } from 'net';
|
|
| 17 |
import { CurlControl } from './curl';
|
| 18 |
import { readFile } from 'fs/promises';
|
| 19 |
import { BlackHoleDetector } from './blackhole-detector';
|
|
|
|
| 20 |
const tldExtract = require('tld-extract');
|
| 21 |
|
| 22 |
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
|
@@ -468,8 +469,11 @@ export class PuppeteerControl extends AsyncService {
|
|
| 468 |
|
| 469 |
circuitBreakerHosts: Set<string> = new Set();
|
| 470 |
|
|
|
|
|
|
|
| 471 |
constructor(
|
| 472 |
protected globalLogger: Logger,
|
|
|
|
| 473 |
protected curlControl: CurlControl,
|
| 474 |
protected blackHoleDetector: BlackHoleDetector,
|
| 475 |
) {
|
|
@@ -774,6 +778,7 @@ export class PuppeteerControl extends AsyncService {
|
|
| 774 |
const pdfUrls: string[] = [];
|
| 775 |
let navigationResponse: HTTPResponse | undefined;
|
| 776 |
const page = await this.getNextPage();
|
|
|
|
| 777 |
this.pagePhase.set(page, 'active');
|
| 778 |
page.on('response', (resp) => {
|
| 779 |
this.blackHoleDetector.itWorked();
|
|
@@ -805,6 +810,19 @@ export class PuppeteerControl extends AsyncService {
|
|
| 805 |
if (!options.proxyResources) {
|
| 806 |
const isDocRequest = ['document', 'xhr', 'fetch', 'websocket', 'prefetch', 'eventsource', 'ping'].includes(typ);
|
| 807 |
if (!isDocRequest) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
const overrides = req.continueRequestOverrides();
|
| 809 |
|
| 810 |
return req.continue(overrides, 0);
|
|
@@ -830,54 +848,69 @@ export class PuppeteerControl extends AsyncService {
|
|
| 830 |
}
|
| 831 |
|
| 832 |
const proxy = options.proxyUrl || sideload?.proxyOrigin?.[reqUrlParsed.origin];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
..
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
if (req.isInterceptResolutionHandled()) {
|
| 847 |
-
return;
|
| 848 |
-
};
|
| 849 |
-
|
| 850 |
-
if (curled.chain.length === 1) {
|
| 851 |
-
if (!curled.file) {
|
| 852 |
return req.respond({
|
| 853 |
status: curled.status,
|
| 854 |
headers: _.omit(curled.headers, 'result'),
|
| 855 |
contentType: curled.contentType,
|
|
|
|
| 856 |
}, 999);
|
| 857 |
}
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
return req.respond({
|
| 863 |
-
status:
|
| 864 |
-
headers: _.omit(
|
| 865 |
-
contentType: curled.contentType,
|
| 866 |
-
body: Uint8Array.from(body),
|
| 867 |
}, 999);
|
|
|
|
|
|
|
| 868 |
}
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
headers:
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
|
|
|
|
|
|
| 879 |
|
| 880 |
-
|
|
|
|
| 881 |
}
|
| 882 |
|
| 883 |
if (req.isInterceptResolutionHandled()) {
|
|
@@ -895,25 +928,6 @@ export class PuppeteerControl extends AsyncService {
|
|
| 895 |
|
| 896 |
return req.continue(continueArgs[0], continueArgs[1]);
|
| 897 |
});
|
| 898 |
-
if (options.extraHeaders) {
|
| 899 |
-
page.on('request', async (req) => {
|
| 900 |
-
if (req.isInterceptResolutionHandled()) {
|
| 901 |
-
return;
|
| 902 |
-
};
|
| 903 |
-
|
| 904 |
-
const overrides = req.continueRequestOverrides();
|
| 905 |
-
const continueArgs = [{
|
| 906 |
-
...overrides,
|
| 907 |
-
headers: {
|
| 908 |
-
...req.headers(),
|
| 909 |
-
...overrides?.headers,
|
| 910 |
-
...options.extraHeaders,
|
| 911 |
-
}
|
| 912 |
-
}, 1] as const;
|
| 913 |
-
|
| 914 |
-
return req.continue(continueArgs[0], continueArgs[1]);
|
| 915 |
-
});
|
| 916 |
-
}
|
| 917 |
let pageScriptEvaluations: Promise<unknown>[] = [];
|
| 918 |
let frameScriptEvaluations: Promise<unknown>[] = [];
|
| 919 |
if (options.injectPageScripts?.length) {
|
|
|
|
| 17 |
import { CurlControl } from './curl';
|
| 18 |
import { readFile } from 'fs/promises';
|
| 19 |
import { BlackHoleDetector } from './blackhole-detector';
|
| 20 |
+
import { AsyncLocalContext } from './async-context';
|
| 21 |
const tldExtract = require('tld-extract');
|
| 22 |
|
| 23 |
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
|
|
|
| 469 |
|
| 470 |
circuitBreakerHosts: Set<string> = new Set();
|
| 471 |
|
| 472 |
+
lifeCycleTrack = new WeakMap();
|
| 473 |
+
|
| 474 |
constructor(
|
| 475 |
protected globalLogger: Logger,
|
| 476 |
+
protected asyncLocalContext: AsyncLocalContext,
|
| 477 |
protected curlControl: CurlControl,
|
| 478 |
protected blackHoleDetector: BlackHoleDetector,
|
| 479 |
) {
|
|
|
|
| 778 |
const pdfUrls: string[] = [];
|
| 779 |
let navigationResponse: HTTPResponse | undefined;
|
| 780 |
const page = await this.getNextPage();
|
| 781 |
+
this.lifeCycleTrack.set(page, this.asyncLocalContext.ctx);
|
| 782 |
this.pagePhase.set(page, 'active');
|
| 783 |
page.on('response', (resp) => {
|
| 784 |
this.blackHoleDetector.itWorked();
|
|
|
|
| 810 |
if (!options.proxyResources) {
|
| 811 |
const isDocRequest = ['document', 'xhr', 'fetch', 'websocket', 'prefetch', 'eventsource', 'ping'].includes(typ);
|
| 812 |
if (!isDocRequest) {
|
| 813 |
+
if (options.extraHeaders) {
|
| 814 |
+
const overrides = req.continueRequestOverrides();
|
| 815 |
+
const continueArgs = [{
|
| 816 |
+
...overrides,
|
| 817 |
+
headers: {
|
| 818 |
+
...req.headers(),
|
| 819 |
+
...overrides?.headers,
|
| 820 |
+
...options.extraHeaders,
|
| 821 |
+
}
|
| 822 |
+
}, 1] as const;
|
| 823 |
+
|
| 824 |
+
return req.continue(continueArgs[0], continueArgs[1]);
|
| 825 |
+
}
|
| 826 |
const overrides = req.continueRequestOverrides();
|
| 827 |
|
| 828 |
return req.continue(overrides, 0);
|
|
|
|
| 848 |
}
|
| 849 |
|
| 850 |
const proxy = options.proxyUrl || sideload?.proxyOrigin?.[reqUrlParsed.origin];
|
| 851 |
+
const ctx = this.lifeCycleTrack.get(page);
|
| 852 |
+
if (proxy && ctx) {
|
| 853 |
+
return this.asyncLocalContext.bridge(ctx, async () => {
|
| 854 |
+
try {
|
| 855 |
+
const curled = await this.curlControl.sideLoad(reqUrlParsed, {
|
| 856 |
+
...options,
|
| 857 |
+
method: req.method(),
|
| 858 |
+
body: req.postData(),
|
| 859 |
+
extraHeaders: {
|
| 860 |
+
...req.headers(),
|
| 861 |
+
...options.extraHeaders,
|
| 862 |
+
},
|
| 863 |
+
proxyUrl: proxy
|
| 864 |
+
});
|
| 865 |
+
if (req.isInterceptResolutionHandled()) {
|
| 866 |
+
return;
|
| 867 |
+
};
|
| 868 |
|
| 869 |
+
if (curled.chain.length === 1) {
|
| 870 |
+
if (!curled.file) {
|
| 871 |
+
return req.respond({
|
| 872 |
+
status: curled.status,
|
| 873 |
+
headers: _.omit(curled.headers, 'result'),
|
| 874 |
+
contentType: curled.contentType,
|
| 875 |
+
}, 999);
|
| 876 |
+
}
|
| 877 |
+
const body = await readFile(await curled.file.filePath);
|
| 878 |
+
if (req.isInterceptResolutionHandled()) {
|
| 879 |
+
return;
|
| 880 |
+
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 881 |
return req.respond({
|
| 882 |
status: curled.status,
|
| 883 |
headers: _.omit(curled.headers, 'result'),
|
| 884 |
contentType: curled.contentType,
|
| 885 |
+
body: Uint8Array.from(body),
|
| 886 |
}, 999);
|
| 887 |
}
|
| 888 |
+
options.sideLoad ??= curled.sideLoadOpts;
|
| 889 |
+
_.merge(options.sideLoad, curled.sideLoadOpts);
|
| 890 |
+
const firstReq = curled.chain[0];
|
| 891 |
+
|
| 892 |
return req.respond({
|
| 893 |
+
status: firstReq.result!.code,
|
| 894 |
+
headers: _.omit(firstReq, 'result'),
|
|
|
|
|
|
|
| 895 |
}, 999);
|
| 896 |
+
} catch (err: any) {
|
| 897 |
+
this.logger.warn(`Failed to sideload ${reqUrlParsed.origin}`, { href: reqUrlParsed.href, err: marshalErrorLike(err) });
|
| 898 |
}
|
| 899 |
+
if (req.isInterceptResolutionHandled()) {
|
| 900 |
+
return;
|
| 901 |
+
};
|
| 902 |
+
const overrides = req.continueRequestOverrides();
|
| 903 |
+
const continueArgs = [{
|
| 904 |
+
...overrides,
|
| 905 |
+
headers: {
|
| 906 |
+
...req.headers(),
|
| 907 |
+
...overrides?.headers,
|
| 908 |
+
...options.extraHeaders,
|
| 909 |
+
}
|
| 910 |
+
}, 1] as const;
|
| 911 |
|
| 912 |
+
return req.continue(continueArgs[0], continueArgs[1]);
|
| 913 |
+
});
|
| 914 |
}
|
| 915 |
|
| 916 |
if (req.isInterceptResolutionHandled()) {
|
|
|
|
| 928 |
|
| 929 |
return req.continue(continueArgs[0], continueArgs[1]);
|
| 930 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
let pageScriptEvaluations: Promise<unknown>[] = [];
|
| 932 |
let frameScriptEvaluations: Promise<unknown>[] = [];
|
| 933 |
if (options.injectPageScripts?.length) {
|
thinapps-shared
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit 20417f5bb7f8c773a835304f0624a180b558ff65
|