Spaces:
Build error
Build error
fix: expect cookie encoding issue
Browse files
backend/functions/src/dto/scrapping-options.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
|
| 2 |
import type { Request, Response } from 'express';
|
| 3 |
-
import
|
| 4 |
-
import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
| 5 |
|
| 6 |
export enum CONTENT_FORMAT {
|
| 7 |
CONTENT = 'content',
|
|
@@ -218,7 +217,7 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 218 |
@Prop({
|
| 219 |
arrayOf: String,
|
| 220 |
})
|
| 221 |
-
setCookies?:
|
| 222 |
|
| 223 |
@Prop()
|
| 224 |
proxyUrl?: string;
|
|
@@ -331,17 +330,17 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 331 |
instance.timeout ??= null;
|
| 332 |
}
|
| 333 |
|
| 334 |
-
const cookies:
|
| 335 |
const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
|
| 336 |
if (Array.isArray(setCookieHeaders)) {
|
| 337 |
for (const setCookie of setCookieHeaders) {
|
| 338 |
cookies.push({
|
| 339 |
-
...parseSetCookieString(setCookie, { decodeValues:
|
| 340 |
});
|
| 341 |
}
|
| 342 |
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
|
| 343 |
cookies.push({
|
| 344 |
-
...parseSetCookieString(setCookieHeaders, { decodeValues:
|
| 345 |
});
|
| 346 |
}
|
| 347 |
instance.setCookies = cookies;
|
|
|
|
| 1 |
import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
|
| 2 |
import type { Request, Response } from 'express';
|
| 3 |
+
import { Cookie, parseString as parseSetCookieString } from 'set-cookie-parser';
|
|
|
|
| 4 |
|
| 5 |
export enum CONTENT_FORMAT {
|
| 6 |
CONTENT = 'content',
|
|
|
|
| 217 |
@Prop({
|
| 218 |
arrayOf: String,
|
| 219 |
})
|
| 220 |
+
setCookies?: Cookie[];
|
| 221 |
|
| 222 |
@Prop()
|
| 223 |
proxyUrl?: string;
|
|
|
|
| 330 |
instance.timeout ??= null;
|
| 331 |
}
|
| 332 |
|
| 333 |
+
const cookies: Cookie[] = [];
|
| 334 |
const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
|
| 335 |
if (Array.isArray(setCookieHeaders)) {
|
| 336 |
for (const setCookie of setCookieHeaders) {
|
| 337 |
cookies.push({
|
| 338 |
+
...parseSetCookieString(setCookie, { decodeValues: true }),
|
| 339 |
});
|
| 340 |
}
|
| 341 |
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
|
| 342 |
cookies.push({
|
| 343 |
+
...parseSetCookieString(setCookieHeaders, { decodeValues: true }),
|
| 344 |
});
|
| 345 |
}
|
| 346 |
instance.setCookies = cookies;
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
import os from 'os';
|
| 2 |
import fs from 'fs';
|
| 3 |
import { container, singleton } from 'tsyringe';
|
| 4 |
-
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick } from 'civkit';
|
| 5 |
import { Logger } from '../shared/services/logger';
|
| 6 |
|
| 7 |
import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page } from 'puppeteer';
|
|
|
|
| 8 |
import puppeteer from 'puppeteer-extra';
|
| 9 |
|
| 10 |
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
|
@@ -67,7 +68,7 @@ export interface ExtendedSnapshot extends PageSnapshot {
|
|
| 67 |
|
| 68 |
export interface ScrappingOptions {
|
| 69 |
proxyUrl?: string;
|
| 70 |
-
cookies?:
|
| 71 |
favorScreenshot?: boolean;
|
| 72 |
waitForSelector?: string | string[];
|
| 73 |
minIntervalMs?: number;
|
|
@@ -817,13 +818,33 @@ export class PuppeteerControl extends AsyncService {
|
|
| 817 |
}
|
| 818 |
if (options?.cookies) {
|
| 819 |
const mapped = options.cookies.map((x) => {
|
| 820 |
-
|
| 821 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 822 |
}
|
| 823 |
|
| 824 |
-
return
|
| 825 |
});
|
| 826 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 827 |
}
|
| 828 |
if (options?.overrideUserAgent) {
|
| 829 |
await page.setUserAgent(options.overrideUserAgent);
|
|
|
|
| 1 |
import os from 'os';
|
| 2 |
import fs from 'fs';
|
| 3 |
import { container, singleton } from 'tsyringe';
|
| 4 |
+
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError } from 'civkit';
|
| 5 |
import { Logger } from '../shared/services/logger';
|
| 6 |
|
| 7 |
import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page } from 'puppeteer';
|
| 8 |
+
import type { Cookie } from 'set-cookie-parser';
|
| 9 |
import puppeteer from 'puppeteer-extra';
|
| 10 |
|
| 11 |
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
|
|
|
| 68 |
|
| 69 |
export interface ScrappingOptions {
|
| 70 |
proxyUrl?: string;
|
| 71 |
+
cookies?: Cookie[];
|
| 72 |
favorScreenshot?: boolean;
|
| 73 |
waitForSelector?: string | string[];
|
| 74 |
minIntervalMs?: number;
|
|
|
|
| 818 |
}
|
| 819 |
if (options?.cookies) {
|
| 820 |
const mapped = options.cookies.map((x) => {
|
| 821 |
+
const draft: CookieParam = {
|
| 822 |
+
name: x.name,
|
| 823 |
+
value: encodeURIComponent(x.value),
|
| 824 |
+
secure: x.secure,
|
| 825 |
+
domain: x.domain,
|
| 826 |
+
path: x.path,
|
| 827 |
+
expires: x.expires ? Math.floor(x.expires.valueOf() / 1000) : undefined,
|
| 828 |
+
sameSite: x.sameSite as any,
|
| 829 |
+
};
|
| 830 |
+
if (!draft.expires && x.maxAge) {
|
| 831 |
+
draft.expires = Math.floor(Date.now() / 1000) + x.maxAge;
|
| 832 |
+
}
|
| 833 |
+
if (!draft.domain) {
|
| 834 |
+
draft.url = parsedUrl.toString();
|
| 835 |
}
|
| 836 |
|
| 837 |
+
return draft;
|
| 838 |
});
|
| 839 |
+
try {
|
| 840 |
+
await page.setCookie(...mapped);
|
| 841 |
+
} catch (err: any) {
|
| 842 |
+
this.logger.warn(`Page ${sn}: Failed to set cookies`, { err: marshalErrorLike(err) });
|
| 843 |
+
throw new ParamValidationError({
|
| 844 |
+
path: 'cookies',
|
| 845 |
+
message: `Failed to set cookies: ${err?.message}`
|
| 846 |
+
});
|
| 847 |
+
}
|
| 848 |
}
|
| 849 |
if (options?.overrideUserAgent) {
|
| 850 |
await page.setUserAgent(options.overrideUserAgent);
|