nomagick commited on
Commit
67d4a9f
·
unverified ·
1 Parent(s): 53bc91c

fix: expect cookie encoding issue

Browse files
backend/functions/src/dto/scrapping-options.ts CHANGED
@@ -1,7 +1,6 @@
1
  import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
2
  import type { Request, Response } from 'express';
3
- import type { CookieParam } from 'puppeteer';
4
- import { parseString as parseSetCookieString } from 'set-cookie-parser';
5
 
6
  export enum CONTENT_FORMAT {
7
  CONTENT = 'content',
@@ -218,7 +217,7 @@ export class CrawlerOptions extends AutoCastable {
218
  @Prop({
219
  arrayOf: String,
220
  })
221
- setCookies?: CookieParam[];
222
 
223
  @Prop()
224
  proxyUrl?: string;
@@ -331,17 +330,17 @@ export class CrawlerOptions extends AutoCastable {
331
  instance.timeout ??= null;
332
  }
333
 
334
- const cookies: CookieParam[] = [];
335
  const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
336
  if (Array.isArray(setCookieHeaders)) {
337
  for (const setCookie of setCookieHeaders) {
338
  cookies.push({
339
- ...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam,
340
  });
341
  }
342
  } else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
343
  cookies.push({
344
- ...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
345
  });
346
  }
347
  instance.setCookies = cookies;
 
1
  import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
2
  import type { Request, Response } from 'express';
3
+ import { Cookie, parseString as parseSetCookieString } from 'set-cookie-parser';
 
4
 
5
  export enum CONTENT_FORMAT {
6
  CONTENT = 'content',
 
217
  @Prop({
218
  arrayOf: String,
219
  })
220
+ setCookies?: Cookie[];
221
 
222
  @Prop()
223
  proxyUrl?: string;
 
330
  instance.timeout ??= null;
331
  }
332
 
333
+ const cookies: Cookie[] = [];
334
  const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
335
  if (Array.isArray(setCookieHeaders)) {
336
  for (const setCookie of setCookieHeaders) {
337
  cookies.push({
338
+ ...parseSetCookieString(setCookie, { decodeValues: true }),
339
  });
340
  }
341
  } else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
342
  cookies.push({
343
+ ...parseSetCookieString(setCookieHeaders, { decodeValues: true }),
344
  });
345
  }
346
  instance.setCookies = cookies;
backend/functions/src/services/puppeteer.ts CHANGED
@@ -1,10 +1,11 @@
1
  import os from 'os';
2
  import fs from 'fs';
3
  import { container, singleton } from 'tsyringe';
4
- import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick } from 'civkit';
5
  import { Logger } from '../shared/services/logger';
6
 
7
  import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page } from 'puppeteer';
 
8
  import puppeteer from 'puppeteer-extra';
9
 
10
  import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
@@ -67,7 +68,7 @@ export interface ExtendedSnapshot extends PageSnapshot {
67
 
68
  export interface ScrappingOptions {
69
  proxyUrl?: string;
70
- cookies?: CookieParam[];
71
  favorScreenshot?: boolean;
72
  waitForSelector?: string | string[];
73
  minIntervalMs?: number;
@@ -817,13 +818,33 @@ export class PuppeteerControl extends AsyncService {
817
  }
818
  if (options?.cookies) {
819
  const mapped = options.cookies.map((x) => {
820
- if (x.domain || x.url) {
821
- return x;
 
 
 
 
 
 
 
 
 
 
 
 
822
  }
823
 
824
- return { ...x, url: parsedUrl.toString() };
825
  });
826
- await page.setCookie(...mapped);
 
 
 
 
 
 
 
 
827
  }
828
  if (options?.overrideUserAgent) {
829
  await page.setUserAgent(options.overrideUserAgent);
 
1
  import os from 'os';
2
  import fs from 'fs';
3
  import { container, singleton } from 'tsyringe';
4
+ import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError } from 'civkit';
5
  import { Logger } from '../shared/services/logger';
6
 
7
  import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page } from 'puppeteer';
8
+ import type { Cookie } from 'set-cookie-parser';
9
  import puppeteer from 'puppeteer-extra';
10
 
11
  import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
 
68
 
69
  export interface ScrappingOptions {
70
  proxyUrl?: string;
71
+ cookies?: Cookie[];
72
  favorScreenshot?: boolean;
73
  waitForSelector?: string | string[];
74
  minIntervalMs?: number;
 
818
  }
819
  if (options?.cookies) {
820
  const mapped = options.cookies.map((x) => {
821
+ const draft: CookieParam = {
822
+ name: x.name,
823
+ value: encodeURIComponent(x.value),
824
+ secure: x.secure,
825
+ domain: x.domain,
826
+ path: x.path,
827
+ expires: x.expires ? Math.floor(x.expires.valueOf() / 1000) : undefined,
828
+ sameSite: x.sameSite as any,
829
+ };
830
+ if (!draft.expires && x.maxAge) {
831
+ draft.expires = Math.floor(Date.now() / 1000) + x.maxAge;
832
+ }
833
+ if (!draft.domain) {
834
+ draft.url = parsedUrl.toString();
835
  }
836
 
837
+ return draft;
838
  });
839
+ try {
840
+ await page.setCookie(...mapped);
841
+ } catch (err: any) {
842
+ this.logger.warn(`Page ${sn}: Failed to set cookies`, { err: marshalErrorLike(err) });
843
+ throw new ParamValidationError({
844
+ path: 'cookies',
845
+ message: `Failed to set cookies: ${err?.message}`
846
+ });
847
+ }
848
  }
849
  if (options?.overrideUserAgent) {
850
  await page.setUserAgent(options.overrideUserAgent);