web_reader / build /dto /crawler-options.js
Mohammad Shahid
Include pre-built files for HF deployment
f316cce
"use strict";
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
return c > 3 && r && Object.defineProperty(target, key, r), r;
};
var __metadata = (this && this.__metadata) || function (k, v) {
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
};
var _a;
Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlerOptionsHeaderOnly = exports.CrawlerOptions = exports.BASE_URL_MODES = exports.IMAGE_RETENTION_MODES = exports.RESPOND_TIMING = exports.ENGINE_TYPE = exports.CONTENT_FORMAT = void 0;
const civ_rpc_1 = require("civkit/civ-rpc");
const fancy_file_1 = require("civkit/fancy-file");
const set_cookie_parser_1 = require("set-cookie-parser");
const turndown_tweakable_options_1 = require("./turndown-tweakable-options");
var CONTENT_FORMAT;
(function (CONTENT_FORMAT) {
CONTENT_FORMAT["CONTENT"] = "content";
CONTENT_FORMAT["MARKDOWN"] = "markdown";
CONTENT_FORMAT["HTML"] = "html";
CONTENT_FORMAT["TEXT"] = "text";
CONTENT_FORMAT["PAGESHOT"] = "pageshot";
CONTENT_FORMAT["SCREENSHOT"] = "screenshot";
CONTENT_FORMAT["VLM"] = "vlm";
CONTENT_FORMAT["READER_LM"] = "readerlm-v2";
})(CONTENT_FORMAT || (exports.CONTENT_FORMAT = CONTENT_FORMAT = {}));
var ENGINE_TYPE;
(function (ENGINE_TYPE) {
ENGINE_TYPE["AUTO"] = "auto";
ENGINE_TYPE["BROWSER"] = "browser";
ENGINE_TYPE["CURL"] = "curl";
ENGINE_TYPE["CF_BROWSER_RENDERING"] = "cf-browser-rendering";
})(ENGINE_TYPE || (exports.ENGINE_TYPE = ENGINE_TYPE = {}));
var RESPOND_TIMING;
(function (RESPOND_TIMING) {
RESPOND_TIMING["HTML"] = "html";
RESPOND_TIMING["VISIBLE_CONTENT"] = "visible-content";
RESPOND_TIMING["MUTATION_IDLE"] = "mutation-idle";
RESPOND_TIMING["RESOURCE_IDLE"] = "resource-idle";
RESPOND_TIMING["MEDIA_IDLE"] = "media-idle";
RESPOND_TIMING["NETWORK_IDLE"] = "network-idle";
})(RESPOND_TIMING || (exports.RESPOND_TIMING = RESPOND_TIMING = {}));
const CONTENT_FORMAT_VALUES = new Set(Object.values(CONTENT_FORMAT));
exports.IMAGE_RETENTION_MODES = ['none', 'all', 'alt', 'all_p', 'alt_p'];
const IMAGE_RETENTION_MODE_VALUES = new Set(exports.IMAGE_RETENTION_MODES);
exports.BASE_URL_MODES = ['initial', 'final'];
const BASE_URL_MODE_VALUES = new Set(exports.BASE_URL_MODES);
class Viewport extends civ_rpc_1.AutoCastable {
}
__decorate([
(0, civ_rpc_1.Prop)({
default: 1024
}),
__metadata("design:type", Number)
], Viewport.prototype, "width", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: 1024
}),
__metadata("design:type", Number)
], Viewport.prototype, "height", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Number)
], Viewport.prototype, "deviceScaleFactor", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Boolean)
], Viewport.prototype, "isMobile", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Boolean)
], Viewport.prototype, "isLandscape", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Boolean)
], Viewport.prototype, "hasTouch", void 0);
let CrawlerOptions = class CrawlerOptions extends civ_rpc_1.AutoCastable {
static from(input) {
const instance = super.from(input);
const ctx = Reflect.get(input, civ_rpc_1.RPC_CALL_ENVIRONMENT);
const customMode = ctx?.get('x-respond-with') || ctx?.get('x-return-format');
if (customMode) {
instance.respondWith = customMode;
}
if (instance.respondWith) {
instance.respondWith = instance.respondWith.toLowerCase();
}
if (instance.respondWith?.includes('lm')) {
if (instance.respondWith.includes('content') || instance.respondWith.includes('markdown')) {
throw new civ_rpc_1.ParamValidationError({
path: 'respondWith',
message: `LM formats conflicts with content/markdown.`,
});
}
}
const locale = ctx?.get('x-locale');
if (locale) {
instance.locale = locale;
}
const referer = ctx?.get('x-referer');
if (referer) {
instance.referer = referer;
}
const withGeneratedAlt = ctx?.get('x-with-generated-alt');
if (withGeneratedAlt) {
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
}
const withLinksSummary = ctx?.get('x-with-links-summary');
if (withLinksSummary) {
if (withLinksSummary === 'all') {
instance.withLinksSummary = withLinksSummary;
}
else {
instance.withLinksSummary = Boolean(withLinksSummary);
}
}
const withImagesSummary = ctx?.get('x-with-images-summary');
if (withImagesSummary) {
instance.withImagesSummary = Boolean(withImagesSummary);
}
const retainImages = ctx?.get('x-retain-images');
if (retainImages && IMAGE_RETENTION_MODE_VALUES.has(retainImages)) {
instance.retainImages = retainImages;
}
if (instance.withGeneratedAlt) {
instance.retainImages = 'all_p';
}
const noCache = ctx?.get('x-no-cache');
if (noCache) {
instance.noCache = Boolean(noCache);
}
if (instance.noCache && instance.cacheTolerance === undefined) {
instance.cacheTolerance = 0;
}
let cacheTolerance = parseInt(ctx?.get('x-cache-tolerance') || '');
if (!isNaN(cacheTolerance)) {
instance.cacheTolerance = cacheTolerance;
}
const noGfm = ctx?.get('x-no-gfm');
if (noGfm) {
instance.noGfm = noGfm === 'table' ? noGfm : Boolean(noGfm);
}
let timeoutSeconds = parseInt(ctx?.get('x-timeout') || '');
if (!isNaN(timeoutSeconds) && timeoutSeconds > 0) {
instance.timeout = timeoutSeconds <= 180 ? timeoutSeconds : 180;
}
else if (ctx?.get('x-timeout')) {
instance.timeout = null;
}
const removeSelector = ctx?.get('x-remove-selector')?.split(', ').filter(Boolean);
instance.removeSelector ??= removeSelector?.length ? removeSelector : undefined;
const targetSelector = ctx?.get('x-target-selector')?.split(', ').filter(Boolean);
instance.targetSelector ??= targetSelector?.length ? targetSelector : undefined;
const waitForSelector = ctx?.get('x-wait-for-selector')?.split(', ').filter(Boolean);
instance.waitForSelector ??= (waitForSelector?.length ? waitForSelector : undefined) || instance.targetSelector;
const overrideUserAgent = ctx?.get('x-user-agent') || undefined;
instance.userAgent ??= overrideUserAgent;
const engine = ctx?.get('x-engine');
if (engine) {
instance.engine = engine;
}
if (instance.engine) {
instance.engine = instance.engine.toLowerCase();
}
if (instance.engine === 'vlm') {
instance.engine = ENGINE_TYPE.BROWSER;
instance.respondWith = CONTENT_FORMAT.VLM;
}
else if (instance.engine === 'readerlm-v2') {
instance.engine = ENGINE_TYPE.AUTO;
instance.respondWith = CONTENT_FORMAT.READER_LM;
}
const keepImgDataUrl = ctx?.get('x-keep-img-data-url');
if (keepImgDataUrl) {
instance.keepImgDataUrl = Boolean(keepImgDataUrl);
}
const withIframe = ctx?.get('x-with-iframe');
if (withIframe) {
instance.withIframe = withIframe.toLowerCase() === 'quoted' ? 'quoted' : Boolean(withIframe);
}
if (instance.withIframe) {
instance.timeout ??= null;
}
const withShadowDom = ctx?.get('x-with-shadow-dom');
if (withShadowDom) {
instance.withShadowDom = Boolean(withShadowDom);
}
if (instance.withShadowDom) {
instance.timeout ??= null;
}
const cookies = [];
const setCookieHeaders = (ctx?.get('x-set-cookie')?.split(', ') || instance.setCookies).filter(Boolean);
if (Array.isArray(setCookieHeaders)) {
for (const setCookie of setCookieHeaders) {
cookies.push({
...(0, set_cookie_parser_1.parseString)(setCookie, { decodeValues: true }),
});
}
}
else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
cookies.push({
...(0, set_cookie_parser_1.parseString)(setCookieHeaders, { decodeValues: true }),
});
}
instance.setCookies = cookies;
const proxyUrl = ctx?.get('x-proxy-url');
instance.proxyUrl ??= proxyUrl || undefined;
const proxy = ctx?.get('x-proxy');
instance.proxy ??= proxy || undefined;
const robotsTxt = ctx?.get('x-robots-txt');
instance.robotsTxt ??= robotsTxt || undefined;
const tokenBudget = ctx?.get('x-token-budget');
instance.tokenBudget ??= parseInt(tokenBudget || '') || undefined;
const baseMode = ctx?.get('x-base');
if (baseMode) {
instance.base = baseMode;
}
const dnt = ctx?.get('dnt');
instance.doNotTrack ??= (parseInt(dnt || '') || null);
const respondTiming = ctx?.get('x-respond-timing');
if (respondTiming) {
instance.respondTiming ??= respondTiming;
}
if (instance.cacheTolerance) {
instance.cacheTolerance = instance.cacheTolerance * 1000;
}
if (ctx) {
instance.markdown ??= turndown_tweakable_options_1.TurnDownTweakableOptions.fromCtx(ctx);
}
return instance;
}
get presumedRespondTiming() {
if (this.respondTiming) {
return this.respondTiming;
}
if (this.timeout && this.timeout >= 20) {
return RESPOND_TIMING.NETWORK_IDLE;
}
if (this.respondWith.includes('shot') || this.respondWith.includes('vlm')) {
return RESPOND_TIMING.MEDIA_IDLE;
}
return RESPOND_TIMING.RESOURCE_IDLE;
}
isSnapshotAcceptableForEarlyResponse(snapshot) {
if (this.waitForSelector?.length) {
return false;
}
const presumedTiming = this.presumedRespondTiming;
if (presumedTiming === RESPOND_TIMING.MEDIA_IDLE && snapshot.lastMediaResourceLoaded && snapshot.lastMutationIdle) {
const now = Date.now();
if ((Math.max(snapshot.lastMediaResourceLoaded, snapshot.lastContentResourceLoaded || 0) + 500) < now) {
return true;
}
}
if ((this.respondWith.includes('vlm') || this.respondWith.includes('pageshot')) && !snapshot.pageshot) {
return false;
}
if ((this.respondWith.includes('vlm') || this.respondWith.includes('screenshot')) && !snapshot.screenshot) {
return false;
}
if (presumedTiming === RESPOND_TIMING.RESOURCE_IDLE && snapshot.lastContentResourceLoaded && snapshot.lastMutationIdle) {
const now = Date.now();
if ((snapshot.lastContentResourceLoaded + 500) < now) {
return true;
}
}
if (this.injectFrameScript?.length || this.injectPageScript?.length) {
return false;
}
if (presumedTiming === RESPOND_TIMING.VISIBLE_CONTENT && snapshot.parsed?.content) {
return true;
}
if (presumedTiming === RESPOND_TIMING.HTML && snapshot.html) {
return true;
}
if (presumedTiming === RESPOND_TIMING.NETWORK_IDLE) {
return false;
}
if (presumedTiming === RESPOND_TIMING.MUTATION_IDLE && snapshot.lastMutationIdle) {
return true;
}
if (this.respondWith.includes('lm')) {
return false;
}
if (this.withIframe) {
return false;
}
return !snapshot.isIntermediate;
}
isCacheQueryApplicable() {
if (this.noCache) {
return false;
}
if (this.cacheTolerance === 0) {
return false;
}
if (this.setCookies?.length) {
return false;
}
if (this.injectFrameScript?.length || this.injectPageScript?.length) {
return false;
}
if (this.viewport) {
return false;
}
return true;
}
isRequestingCompoundContentFormat() {
return !CONTENT_FORMAT_VALUES.has(this.respondWith);
}
browserIsNotRequired() {
if (this.respondTiming && ![RESPOND_TIMING.HTML, RESPOND_TIMING.VISIBLE_CONTENT].includes(this.respondTiming)) {
return false;
}
if (this.respondWith.includes(CONTENT_FORMAT.PAGESHOT) || this.respondWith.includes(CONTENT_FORMAT.SCREENSHOT)) {
return false;
}
if (this.injectFrameScript?.length || this.injectPageScript?.length) {
return false;
}
if (this.waitForSelector?.length) {
return false;
}
if (this.withIframe || this.withShadowDom) {
return false;
}
if (this.viewport) {
return false;
}
if (this.pdf) {
return false;
}
if (this.html) {
return false;
}
return true;
}
};
exports.CrawlerOptions = CrawlerOptions;
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "url", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "html", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
type: BASE_URL_MODE_VALUES,
default: 'initial',
}),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "base", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
desc: 'Base64 encoded PDF.',
type: [fancy_file_1.FancyFile, String]
}),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "pdf", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: CONTENT_FORMAT.CONTENT,
type: [CONTENT_FORMAT, String]
}),
__metadata("design:type", String)
], CrawlerOptions.prototype, "respondWith", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: false,
}),
__metadata("design:type", Boolean)
], CrawlerOptions.prototype, "withGeneratedAlt", void 0);
__decorate([
(0, civ_rpc_1.Prop)({ default: 'all', type: IMAGE_RETENTION_MODE_VALUES }),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "retainImages", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: false,
}),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "withLinksSummary", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: false,
}),
__metadata("design:type", Boolean)
], CrawlerOptions.prototype, "withImagesSummary", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: false,
}),
__metadata("design:type", Boolean)
], CrawlerOptions.prototype, "noCache", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: false,
}),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "noGfm", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Number)
], CrawlerOptions.prototype, "cacheTolerance", void 0);
__decorate([
(0, civ_rpc_1.Prop)({ arrayOf: String }),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "targetSelector", void 0);
__decorate([
(0, civ_rpc_1.Prop)({ arrayOf: String }),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "waitForSelector", void 0);
__decorate([
(0, civ_rpc_1.Prop)({ arrayOf: String }),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "removeSelector", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: false,
}),
__metadata("design:type", Boolean)
], CrawlerOptions.prototype, "keepImgDataUrl", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: false,
type: [String, Boolean]
}),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "withIframe", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
default: false,
}),
__metadata("design:type", Boolean)
], CrawlerOptions.prototype, "withShadowDom", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
arrayOf: String,
}),
__metadata("design:type", Array)
], CrawlerOptions.prototype, "setCookies", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "proxyUrl", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "proxy", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "userAgent", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "engine", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
arrayOf: String,
}),
__metadata("design:type", Array)
], CrawlerOptions.prototype, "injectPageScript", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
arrayOf: String,
}),
__metadata("design:type", Array)
], CrawlerOptions.prototype, "injectFrameScript", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
validate: (v) => v > 0 && v <= 180,
type: Number,
nullable: true,
}),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "timeout", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "locale", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "referer", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Number)
], CrawlerOptions.prototype, "tokenBudget", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Viewport)
], CrawlerOptions.prototype, "viewport", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "instruction", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "jsonSchema", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", String)
], CrawlerOptions.prototype, "robotsTxt", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", Object)
], CrawlerOptions.prototype, "doNotTrack", void 0);
__decorate([
(0, civ_rpc_1.Prop)(),
__metadata("design:type", turndown_tweakable_options_1.TurnDownTweakableOptions)
], CrawlerOptions.prototype, "markdown", void 0);
__decorate([
(0, civ_rpc_1.Prop)({
type: RESPOND_TIMING,
}),
__metadata("design:type", String)
], CrawlerOptions.prototype, "respondTiming", void 0);
exports.CrawlerOptions = CrawlerOptions = __decorate([
(0, civ_rpc_1.Also)({
openapi: {
operation: {
parameters: {
'Accept': {
description: `Specifies your preference for the response format.\n\n` +
`Supported formats: \n` +
`- text/event-stream\n` +
`- application/json or text/json\n` +
`- text/plain`,
in: 'header',
schema: { type: 'string' }
},
'X-Cache-Tolerance': {
description: `Sets internal cache tolerance in seconds if this header is specified with a integer.`,
in: 'header',
schema: { type: 'string' }
},
'X-No-Cache': {
description: `Ignores internal cache if this header is specified with a value.\n\nEquivalent to X-Cache-Tolerance: 0`,
in: 'header',
schema: { type: 'string' }
},
'X-Respond-With': {
description: `Specifies the (non-default) form of the crawled data you prefer.\n\n` +
`Supported formats: \n` +
`- markdown\n` +
`- html\n` +
`- text\n` +
`- pageshot\n` +
`- screenshot\n` +
`- content\n` +
`- any combination of the above\n` +
`- readerlm-v2\n` +
`- vlm\n\n` +
`Default: content\n`,
in: 'header',
schema: { type: 'string' }
},
'X-Wait-For-Selector': {
description: `Specifies a CSS selector to wait for the appearance of such an element before returning.\n\n` +
'Example: `X-Wait-For-Selector: .content-block`\n',
in: 'header',
schema: { type: 'string' }
},
'X-Target-Selector': {
description: `Specifies a CSS selector for return target instead of the full html.\n\n` +
'Implies `X-Wait-For-Selector: (same selector)`',
in: 'header',
schema: { type: 'string' }
},
'X-Remove-Selector': {
description: `Specifies a CSS selector to remove elements from the full html.\n\n` +
'Example `X-Remove-Selector: nav`',
in: 'header',
schema: { type: 'string' }
},
'X-Keep-Img-Data-Url': {
description: `Keep data-url as it instead of transforming them to object-url. (Only applicable when targeting markdown format)\n\n` +
'Example `X-Keep-Img-Data-Url: true`',
in: 'header',
schema: { type: 'string' }
},
'X-Proxy-Url': {
description: `Specifies your custom proxy if you prefer to use one.\n\n` +
`Supported protocols: \n` +
`- http\n` +
`- https\n` +
`- socks4\n` +
`- socks5\n\n` +
`For authentication, https://user:pass@host:port`,
in: 'header',
schema: { type: 'string' }
},
'X-Proxy': {
description: `Use a proxy server provided by us.\n\nOptionally specify two-letter country code.`,
in: 'header',
schema: { type: 'string' }
},
'X-Robots-Txt': {
description: `Load and conform to the respective robot.txt on the target origin.\n\nOptionally specify a bot UA to check against.\n\n`,
in: 'header',
schema: { type: 'string' }
},
'DNT': {
description: `When set to 1, prevent the result of this request to be cached in the system.\n\n`,
in: 'header',
schema: { type: 'string' }
},
'X-Set-Cookie': {
description: `Sets cookie(s) to the headless browser for your request. \n\n` +
`Syntax is the same with standard Set-Cookie`,
in: 'header',
schema: { type: 'string' }
},
'X-With-Generated-Alt': {
description: `Enable automatic alt-text generating for images without an meaningful alt-text.\n\n` +
`Note: Does not work when \`X-Respond-With\` is specified`,
in: 'header',
schema: { type: 'string' }
},
'X-With-Images-Summary': {
description: `Enable dedicated summary section for images on the page.`,
in: 'header',
schema: { type: 'string' }
},
'X-With-links-Summary': {
description: `Enable dedicated summary section for hyper links on the page.`,
in: 'header',
schema: { type: 'string' }
},
'X-Retain-Images': {
description: `Image retention modes.\n\n` +
`Supported modes: \n` +
`- all: all images\n` +
`- none: no images\n` +
`- alt: only alt text\n` +
`- all_p: all images and with generated alt text\n` +
`- alt_p: only alt text and with generated alt\n\n`,
in: 'header',
schema: { type: 'string' }
},
'X-With-Iframe': {
description: `Enable filling iframe contents into main. (violates standards)`,
in: 'header',
schema: { type: 'string' }
},
'X-With-Shadow-Dom': {
description: `Enable filling shadow dom contents into main. (violates standards)`,
in: 'header',
schema: { type: 'string' }
},
'X-User-Agent': {
description: `Override User-Agent.`,
in: 'header',
schema: { type: 'string' }
},
'X-Timeout': {
description: `Specify timeout in seconds. Max 180.`,
in: 'header',
schema: { type: 'string' }
},
'X-Locale': {
description: 'Specify browser locale for the page.',
in: 'header',
schema: { type: 'string' }
},
'X-Referer': {
description: 'Specify referer for the page.',
in: 'header',
schema: { type: 'string' }
},
'X-Token-Budget': {
description: 'Specify a budget in tokens.\n\nIf the resulting token cost exceeds the budget, the request is rejected.',
in: 'header',
schema: { type: 'string' }
},
'X-Respond-Timing': {
description: `Explicitly specify the respond timing. One of the following:\n\n` +
`- html: directly return unrendered HTML\n` +
`- visible-content: return immediately when any content becomes available\n` +
`- mutation-idle: wait for DOM mutations to settle and remain unchanged for at least 0.2s\n` +
`- resource-idle: wait for no additional resources that would affect page logic and content has SUCCEEDED loading in 0.5s\n` +
`- media-idle: wait for no additional resources, including media resources, has SUCCEEDED loading in 0.5s\n` +
`- network-idle: wait for full load of webpage, also known as networkidle0.\n\n`,
in: 'header',
schema: { type: 'string' }
},
'X-Engine': {
description: 'Specify the engine to use for crawling.\n\nSupported: browser, direct, cf-browser-rendering',
in: 'header',
schema: { type: 'string' }
},
'X-Base': {
description: 'Select base modes of relative URLs.\n\nSupported: initial, final',
in: 'header',
schema: { type: 'string' }
},
'X-Md-Heading-Style': {
description: 'Heading style of the generated markdown.\n\nThis is an option passed through to [Turndown](https://github.com/mixmark-io/turndown?tab=readme-ov-file#options).\n\nSupported: setext, atx',
in: 'header',
schema: { type: 'string' }
},
'X-Md-Hr': {
description: 'Hr text of the generated markdown.\n\nThis is an option passed through to [Turndown](https://github.com/mixmark-io/turndown?tab=readme-ov-file#options).',
in: 'header',
schema: { type: 'string' }
},
'X-Md-Bullet-List-Marker': {
description: 'Bullet list marker of the generated markdown.\n\nThis is an option passed through to [Turndown](https://github.com/mixmark-io/turndown?tab=readme-ov-file#options).\n\nSupported: -, +, *',
in: 'header',
schema: { type: 'string' }
},
'X-Md-Em-Delimiter': {
description: 'Em delimiter of the generated markdown.\n\nThis is an option passed through to [Turndown](https://github.com/mixmark-io/turndown?tab=readme-ov-file#options).\n\nSupported: _, *',
in: 'header',
schema: { type: 'string' }
},
'X-Md-Strong-Delimiter': {
description: 'Strong delimiter of the generated markdown.\n\nThis is an option passed through to [Turndown](https://github.com/mixmark-io/turndown?tab=readme-ov-file#options).\n\nSupported: **, __',
in: 'header',
schema: { type: 'string' }
},
'X-Md-Link-Style': {
description: 'Link style of the generated markdown.\n\nThis is an option passed through to [Turndown](https://github.com/mixmark-io/turndown?tab=readme-ov-file#options).\n\nSupported: inlined, referenced, discarded',
in: 'header',
schema: { type: 'string' }
},
'X-Md-Link-Reference-Style': {
description: 'Link reference style of the generated markdown.\n\nThis is an option passed through to [Turndown](https://github.com/mixmark-io/turndown?tab=readme-ov-file#options).\n\nSupported: full, collapsed, shortcut, discarded',
in: 'header',
schema: { type: 'string' }
},
}
}
}
})
], CrawlerOptions);
class CrawlerOptionsHeaderOnly extends CrawlerOptions {
static from(input) {
const instance = super.from({
[civ_rpc_1.RPC_CALL_ENVIRONMENT]: Reflect.get(input, civ_rpc_1.RPC_CALL_ENVIRONMENT),
});
return instance;
}
}
exports.CrawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly;
//# sourceMappingURL=crawler-options.js.map