nomagick commited on
Commit
8597daa
·
unverified ·
1 Parent(s): e92ff33

fix: side load context bridging

Browse files
package-lock.json CHANGED
@@ -17,7 +17,7 @@
17
  "axios": "^1.3.3",
18
  "bcrypt": "^5.1.0",
19
  "busboy": "^1.6.0",
20
- "civkit": "^0.8.4-c44153f",
21
  "core-js": "^3.37.1",
22
  "cors": "^2.8.5",
23
  "dayjs": "^1.11.9",
@@ -4095,9 +4095,9 @@
4095
  }
4096
  },
4097
  "node_modules/civkit": {
4098
- "version": "0.8.4-c44153f",
4099
- "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-c44153f.tgz",
4100
- "integrity": "sha512-VBElW71aAqqP0G+8F460hZfnDrn4kMCxTCn+FaFqGG2B0TmNkfwjVZL9VuDRNtSzNBbEO9rRKLJG1iw4y8sZxQ==",
4101
  "license": "AGPL",
4102
  "dependencies": {
4103
  "lodash": "^4.17.21",
 
17
  "axios": "^1.3.3",
18
  "bcrypt": "^5.1.0",
19
  "busboy": "^1.6.0",
20
+ "civkit": "^0.8.4-6ed9027",
21
  "core-js": "^3.37.1",
22
  "cors": "^2.8.5",
23
  "dayjs": "^1.11.9",
 
4095
  }
4096
  },
4097
  "node_modules/civkit": {
4098
+ "version": "0.8.4-6ed9027",
4099
+ "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-6ed9027.tgz",
4100
+ "integrity": "sha512-VU8Ykik1L16Li9/QZfw5wYsmu3jJYH/zIHbM6Vd2ajRI7Mh4fSO3cXadUntM190BersLW9Fts+qunDPabhIWZA==",
4101
  "license": "AGPL",
4102
  "dependencies": {
4103
  "lodash": "^4.17.21",
package.json CHANGED
@@ -25,7 +25,7 @@
25
  "axios": "^1.3.3",
26
  "bcrypt": "^5.1.0",
27
  "busboy": "^1.6.0",
28
- "civkit": "^0.8.4-c44153f",
29
  "core-js": "^3.37.1",
30
  "cors": "^2.8.5",
31
  "dayjs": "^1.11.9",
 
25
  "axios": "^1.3.3",
26
  "bcrypt": "^5.1.0",
27
  "busboy": "^1.6.0",
28
+ "civkit": "^0.8.4-6ed9027",
29
  "core-js": "^3.37.1",
30
  "cors": "^2.8.5",
31
  "dayjs": "^1.11.9",
src/services/puppeteer.ts CHANGED
@@ -17,6 +17,7 @@ import { isIP } from 'net';
17
  import { CurlControl } from './curl';
18
  import { readFile } from 'fs/promises';
19
  import { BlackHoleDetector } from './blackhole-detector';
 
20
  const tldExtract = require('tld-extract');
21
 
22
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
@@ -468,8 +469,11 @@ export class PuppeteerControl extends AsyncService {
468
 
469
  circuitBreakerHosts: Set<string> = new Set();
470
 
 
 
471
  constructor(
472
  protected globalLogger: Logger,
 
473
  protected curlControl: CurlControl,
474
  protected blackHoleDetector: BlackHoleDetector,
475
  ) {
@@ -774,6 +778,7 @@ export class PuppeteerControl extends AsyncService {
774
  const pdfUrls: string[] = [];
775
  let navigationResponse: HTTPResponse | undefined;
776
  const page = await this.getNextPage();
 
777
  this.pagePhase.set(page, 'active');
778
  page.on('response', (resp) => {
779
  this.blackHoleDetector.itWorked();
@@ -805,6 +810,19 @@ export class PuppeteerControl extends AsyncService {
805
  if (!options.proxyResources) {
806
  const isDocRequest = ['document', 'xhr', 'fetch', 'websocket', 'prefetch', 'eventsource', 'ping'].includes(typ);
807
  if (!isDocRequest) {
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  const overrides = req.continueRequestOverrides();
809
 
810
  return req.continue(overrides, 0);
@@ -830,54 +848,69 @@ export class PuppeteerControl extends AsyncService {
830
  }
831
 
832
  const proxy = options.proxyUrl || sideload?.proxyOrigin?.[reqUrlParsed.origin];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
 
834
- if (proxy) {
835
- try {
836
- const curled = await this.curlControl.sideLoad(reqUrlParsed, {
837
- ...options,
838
- method: req.method(),
839
- body: req.postData(),
840
- extraHeaders: {
841
- ...req.headers(),
842
- ...options.extraHeaders,
843
- },
844
- proxyUrl: proxy
845
- });
846
- if (req.isInterceptResolutionHandled()) {
847
- return;
848
- };
849
-
850
- if (curled.chain.length === 1) {
851
- if (!curled.file) {
852
  return req.respond({
853
  status: curled.status,
854
  headers: _.omit(curled.headers, 'result'),
855
  contentType: curled.contentType,
 
856
  }, 999);
857
  }
858
- const body = await readFile(await curled.file.filePath);
859
- if (req.isInterceptResolutionHandled()) {
860
- return;
861
- };
862
  return req.respond({
863
- status: curled.status,
864
- headers: _.omit(curled.headers, 'result'),
865
- contentType: curled.contentType,
866
- body: Uint8Array.from(body),
867
  }, 999);
 
 
868
  }
869
- options.sideLoad ??= curled.sideLoadOpts;
870
- _.merge(options.sideLoad, curled.sideLoadOpts);
871
- const firstReq = curled.chain[0];
872
-
873
- return req.respond({
874
- status: firstReq.result!.code,
875
- headers: _.omit(firstReq, 'result'),
876
- }, 999);
877
- } catch (err: any) {
878
- this.logger.warn(`Failed to sideload ${reqUrlParsed.origin}`, { href: reqUrlParsed.href, err: marshalErrorLike(err) });
 
 
879
 
880
- }
 
881
  }
882
 
883
  if (req.isInterceptResolutionHandled()) {
@@ -895,25 +928,6 @@ export class PuppeteerControl extends AsyncService {
895
 
896
  return req.continue(continueArgs[0], continueArgs[1]);
897
  });
898
- if (options.extraHeaders) {
899
- page.on('request', async (req) => {
900
- if (req.isInterceptResolutionHandled()) {
901
- return;
902
- };
903
-
904
- const overrides = req.continueRequestOverrides();
905
- const continueArgs = [{
906
- ...overrides,
907
- headers: {
908
- ...req.headers(),
909
- ...overrides?.headers,
910
- ...options.extraHeaders,
911
- }
912
- }, 1] as const;
913
-
914
- return req.continue(continueArgs[0], continueArgs[1]);
915
- });
916
- }
917
  let pageScriptEvaluations: Promise<unknown>[] = [];
918
  let frameScriptEvaluations: Promise<unknown>[] = [];
919
  if (options.injectPageScripts?.length) {
 
17
  import { CurlControl } from './curl';
18
  import { readFile } from 'fs/promises';
19
  import { BlackHoleDetector } from './blackhole-detector';
20
+ import { AsyncLocalContext } from './async-context';
21
  const tldExtract = require('tld-extract');
22
 
23
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
 
469
 
470
  circuitBreakerHosts: Set<string> = new Set();
471
 
472
+ lifeCycleTrack = new WeakMap();
473
+
474
  constructor(
475
  protected globalLogger: Logger,
476
+ protected asyncLocalContext: AsyncLocalContext,
477
  protected curlControl: CurlControl,
478
  protected blackHoleDetector: BlackHoleDetector,
479
  ) {
 
778
  const pdfUrls: string[] = [];
779
  let navigationResponse: HTTPResponse | undefined;
780
  const page = await this.getNextPage();
781
+ this.lifeCycleTrack.set(page, this.asyncLocalContext.ctx);
782
  this.pagePhase.set(page, 'active');
783
  page.on('response', (resp) => {
784
  this.blackHoleDetector.itWorked();
 
810
  if (!options.proxyResources) {
811
  const isDocRequest = ['document', 'xhr', 'fetch', 'websocket', 'prefetch', 'eventsource', 'ping'].includes(typ);
812
  if (!isDocRequest) {
813
+ if (options.extraHeaders) {
814
+ const overrides = req.continueRequestOverrides();
815
+ const continueArgs = [{
816
+ ...overrides,
817
+ headers: {
818
+ ...req.headers(),
819
+ ...overrides?.headers,
820
+ ...options.extraHeaders,
821
+ }
822
+ }, 1] as const;
823
+
824
+ return req.continue(continueArgs[0], continueArgs[1]);
825
+ }
826
  const overrides = req.continueRequestOverrides();
827
 
828
  return req.continue(overrides, 0);
 
848
  }
849
 
850
  const proxy = options.proxyUrl || sideload?.proxyOrigin?.[reqUrlParsed.origin];
851
+ const ctx = this.lifeCycleTrack.get(page);
852
+ if (proxy && ctx) {
853
+ return this.asyncLocalContext.bridge(ctx, async () => {
854
+ try {
855
+ const curled = await this.curlControl.sideLoad(reqUrlParsed, {
856
+ ...options,
857
+ method: req.method(),
858
+ body: req.postData(),
859
+ extraHeaders: {
860
+ ...req.headers(),
861
+ ...options.extraHeaders,
862
+ },
863
+ proxyUrl: proxy
864
+ });
865
+ if (req.isInterceptResolutionHandled()) {
866
+ return;
867
+ };
868
 
869
+ if (curled.chain.length === 1) {
870
+ if (!curled.file) {
871
+ return req.respond({
872
+ status: curled.status,
873
+ headers: _.omit(curled.headers, 'result'),
874
+ contentType: curled.contentType,
875
+ }, 999);
876
+ }
877
+ const body = await readFile(await curled.file.filePath);
878
+ if (req.isInterceptResolutionHandled()) {
879
+ return;
880
+ };
 
 
 
 
 
 
881
  return req.respond({
882
  status: curled.status,
883
  headers: _.omit(curled.headers, 'result'),
884
  contentType: curled.contentType,
885
+ body: Uint8Array.from(body),
886
  }, 999);
887
  }
888
+ options.sideLoad ??= curled.sideLoadOpts;
889
+ _.merge(options.sideLoad, curled.sideLoadOpts);
890
+ const firstReq = curled.chain[0];
891
+
892
  return req.respond({
893
+ status: firstReq.result!.code,
894
+ headers: _.omit(firstReq, 'result'),
 
 
895
  }, 999);
896
+ } catch (err: any) {
897
+ this.logger.warn(`Failed to sideload ${reqUrlParsed.origin}`, { href: reqUrlParsed.href, err: marshalErrorLike(err) });
898
  }
899
+ if (req.isInterceptResolutionHandled()) {
900
+ return;
901
+ };
902
+ const overrides = req.continueRequestOverrides();
903
+ const continueArgs = [{
904
+ ...overrides,
905
+ headers: {
906
+ ...req.headers(),
907
+ ...overrides?.headers,
908
+ ...options.extraHeaders,
909
+ }
910
+ }, 1] as const;
911
 
912
+ return req.continue(continueArgs[0], continueArgs[1]);
913
+ });
914
  }
915
 
916
  if (req.isInterceptResolutionHandled()) {
 
928
 
929
  return req.continue(continueArgs[0], continueArgs[1]);
930
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
931
  let pageScriptEvaluations: Promise<unknown>[] = [];
932
  let frameScriptEvaluations: Promise<unknown>[] = [];
933
  if (options.injectPageScripts?.length) {
thinapps-shared CHANGED
@@ -1 +1 @@
1
- Subproject commit 16521fd4a55f983c050d4cdd0c24a8ac400901d1
 
1
+ Subproject commit 20417f5bb7f8c773a835304f0624a180b558ff65