Spaces:

Echo-AI-official
/

Fire-crawl

Paused

App Files Files Community

Fire-crawl / openapi-v0.json

Echo-AI-official

Upload 280 files

0e759d2 verified 9 months ago

raw

history blame contribute delete

33 kB

	{
	"openapi": "3.0.0",
	"info": {
	"title": "Firecrawl API",
	"version": "0.0.0",
	"description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
	"contact": {
	"name": "Firecrawl Support",
	"url": "https://firecrawl.dev/support",
	"email": "support@firecrawl.dev"
	}
	},
	"servers": [
	{
	"url": "https://api.firecrawl.dev/v0"
	}
	],
	"paths": {
	"/scrape": {
	"post": {
	"summary": "Scrape a single URL and optionally extract information using an LLM",
	"operationId": "scrapeAndExtractFromUrl",
	"tags": ["Scraping"],
	"security": [
	{
	"bearerAuth": []
	}
	],
	"requestBody": {
	"required": true,
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"url": {
	"type": "string",
	"format": "uri",
	"description": "The URL to scrape"
	},
	"pageOptions": {
	"type": "object",
	"properties": {
	"headers": {
	"type": "object",
	"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
	},
	"includeHtml": {
	"type": "boolean",
	"description": "Include the HTML version of the content on page. Will output a html key in the response.",
	"default": false
	},
	"includeRawHtml": {
	"type": "boolean",
	"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
	"default": false
	},
	"onlyIncludeTags": {
	"type": "array",
	"items": {
	"type": "string"
	},
	"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
	},
	"onlyMainContent": {
	"type": "boolean",
	"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
	"default": false
	},
	"removeTags": {
	"type": "array",
	"items": {
	"type": "string"
	},
	"description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
	},
	"replaceAllPathsWithAbsolutePaths": {
	"type": "boolean",
	"description": "Replace all relative paths with absolute paths for images and links",
	"default": false
	},
	"screenshot": {
	"type": "boolean",
	"description": "Include a screenshot of the top of the page that you are scraping.",
	"default": false
	},
	"fullPageScreenshot": {
	"type": "boolean",
	"description": "Include a full page screenshot of the page that you are scraping.",
	"default": false
	},
	"waitFor": {
	"type": "integer",
	"description": "Wait x amount of milliseconds for the page to load to fetch content",
	"default": 0
	}
	}
	},
	"extractorOptions": {
	"type": "object",
	"description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
	"default": {},
	"properties": {
	"mode": {
	"type": "string",
	"enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
	"description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
	},
	"extractionPrompt": {
	"type": "string",
	"description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
	},
	"extractionSchema": {
	"type": "object",
	"additionalProperties": true,
	"description": "The schema for the data to be extracted, required only for LLM extraction modes.",
	"required": [
	"company_mission",
	"supports_sso",
	"is_open_source"
	]
	}
	}
	},
	"timeout": {
	"type": "integer",
	"description": "Timeout in milliseconds for the request",
	"default": 30000
	}
	},
	"required": ["url"]
	}
	}
	}
	},
	"responses": {
	"200": {
	"description": "Successful response",
	"content": {
	"application/json": {
	"schema": {
	"$ref": "#/components/schemas/ScrapeResponse"
	}
	}
	}
	},
	"402": {
	"description": "Payment required",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Payment required to access this resource."
	}
	}
	}
	}
	}
	},
	"429": {
	"description": "Too many requests",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Request rate limit exceeded. Please wait and try again later."
	}
	}
	}
	}
	}
	},
	"500": {
	"description": "Server error",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "An unexpected error occurred on the server."
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"/crawl": {
	"post": {
	"summary": "Crawl multiple URLs based on options",
	"operationId": "crawlUrls",
	"tags": ["Crawling"],
	"security": [
	{
	"bearerAuth": []
	}
	],
	"requestBody": {
	"required": true,
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"url": {
	"type": "string",
	"format": "uri",
	"description": "The base URL to start crawling from"
	},
	"crawlerOptions": {
	"type": "object",
	"properties": {
	"includes": {
	"type": "array",
	"items": {
	"type": "string"
	},
	"description": "URL patterns to include"
	},
	"excludes": {
	"type": "array",
	"items": {
	"type": "string"
	},
	"description": "URL patterns to exclude"
	},
	"generateImgAltText": {
	"type": "boolean",
	"description": "Generate alt text for images using LLMs (must have a paid plan)",
	"default": false
	},
	"returnOnlyUrls": {
	"type": "boolean",
	"description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
	"default": false
	},
	"maxDepth": {
	"type": "integer",
	"description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern."
	},
	"mode": {
	"type": "string",
	"enum": ["default", "fast"],
	"description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
	"default": "default"
	},
	"ignoreSitemap": {
	"type": "boolean",
	"description": "Ignore the website sitemap when crawling",
	"default": false
	},
	"limit": {
	"type": "integer",
	"description": "Maximum number of pages to crawl",
	"default": 10000
	},
	"allowBackwardCrawling": {
	"type": "boolean",
	"description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
	"default": false
	},
	"allowExternalContentLinks": {
	"type": "boolean",
	"description": "Allows the crawler to follow links to external websites.",
	"default": false
	}
	}
	},
	"pageOptions": {
	"type": "object",
	"properties": {
	"headers": {
	"type": "object",
	"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
	},
	"includeHtml": {
	"type": "boolean",
	"description": "Include the HTML version of the content on page. Will output a html key in the response.",
	"default": false
	},
	"includeRawHtml": {
	"type": "boolean",
	"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
	"default": false
	},
	"onlyIncludeTags": {
	"type": "array",
	"items": {
	"type": "string"
	},
	"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
	},
	"onlyMainContent": {
	"type": "boolean",
	"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
	"default": false
	},
	"removeTags": {
	"type": "array",
	"items": {
	"type": "string"
	},
	"description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
	},
	"replaceAllPathsWithAbsolutePaths": {
	"type": "boolean",
	"description": "Replace all relative paths with absolute paths for images and links",
	"default": false
	},
	"screenshot": {
	"type": "boolean",
	"description": "Include a screenshot of the top of the page that you are scraping.",
	"default": false
	},
	"fullPageScreenshot": {
	"type": "boolean",
	"description": "Include a full page screenshot of the page that you are scraping.",
	"default": false
	},
	"waitFor": {
	"type": "integer",
	"description": "Wait x amount of milliseconds for the page to load to fetch content",
	"default": 0
	}
	}
	}
	},
	"required": ["url"]
	}
	}
	}
	},
	"responses": {
	"200": {
	"description": "Successful response",
	"content": {
	"application/json": {
	"schema": {
	"$ref": "#/components/schemas/CrawlResponse"
	}
	}
	}
	},
	"402": {
	"description": "Payment required",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Payment required to access this resource."
	}
	}
	}
	}
	}
	},
	"429": {
	"description": "Too many requests",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Request rate limit exceeded. Please wait and try again later."
	}
	}
	}
	}
	}
	},
	"500": {
	"description": "Server error",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "An unexpected error occurred on the server."
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"/search": {
	"post": {
	"summary": "Search for a keyword in Google, returns top page results with markdown content for each page",
	"operationId": "searchGoogle",
	"tags": ["Search"],
	"security": [
	{
	"bearerAuth": []
	}
	],
	"requestBody": {
	"required": true,
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"query": {
	"type": "string",
	"format": "uri",
	"description": "The query to search for"
	},
	"pageOptions": {
	"type": "object",
	"properties": {
	"onlyMainContent": {
	"type": "boolean",
	"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
	"default": false
	},
	"fetchPageContent": {
	"type": "boolean",
	"description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
	"default": true
	},
	"includeHtml": {
	"type": "boolean",
	"description": "Include the HTML version of the content on page. Will output a html key in the response.",
	"default": false
	},
	"includeRawHtml": {
	"type": "boolean",
	"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
	"default": false
	}
	}
	},
	"searchOptions": {
	"type": "object",
	"properties": {
	"limit": {
	"type": "integer",
	"description": "Maximum number of results. Max is 20 during beta."
	}
	}
	}
	},
	"required": ["query"]
	}
	}
	}
	},
	"responses": {
	"200": {
	"description": "Successful response",
	"content": {
	"application/json": {
	"schema": {
	"$ref": "#/components/schemas/SearchResponse"
	}
	}
	}
	},
	"402": {
	"description": "Payment required",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Payment required to access this resource."
	}
	}
	}
	}
	}
	},
	"429": {
	"description": "Too many requests",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Request rate limit exceeded. Please wait and try again later."
	}
	}
	}
	}
	}
	},
	"500": {
	"description": "Server error",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "An unexpected error occurred on the server."
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"/crawl/status/{jobId}": {
	"get": {
	"tags": ["Crawl"],
	"summary": "Get the status of a crawl job",
	"operationId": "getCrawlStatus",
	"security": [
	{
	"bearerAuth": []
	}
	],
	"parameters": [
	{
	"name": "jobId",
	"in": "path",
	"description": "ID of the crawl job",
	"required": true,
	"schema": {
	"type": "string"
	}
	}
	],
	"responses": {
	"200": {
	"description": "Successful response",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"status": {
	"type": "string",
	"description": "Status of the job (completed, active, failed, paused)"
	},
	"current": {
	"type": "integer",
	"description": "Current page number"
	},
	"total": {
	"type": "integer",
	"description": "Total number of pages"
	},
	"data": {
	"type": "array",
	"items": {
	"$ref": "#/components/schemas/CrawlStatusResponseObj"
	},
	"description": "Data returned from the job (null when it is in progress)"
	},
	"partial_data": {
	"type": "array",
	"items": {
	"$ref": "#/components/schemas/CrawlStatusResponseObj"
	},
	"description": "Partial documents returned as it is being crawled (streaming). This feature is currently in alpha - expect breaking changes When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
	}
	}
	}
	}
	}
	},
	"402": {
	"description": "Payment required",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Payment required to access this resource."
	}
	}
	}
	}
	}
	},
	"429": {
	"description": "Too many requests",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Request rate limit exceeded. Please wait and try again later."
	}
	}
	}
	}
	}
	},
	"500": {
	"description": "Server error",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "An unexpected error occurred on the server."
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"/crawl/cancel/{jobId}": {
	"delete": {
	"tags": ["Crawl"],
	"summary": "Cancel a crawl job",
	"operationId": "cancelCrawlJob",
	"security": [
	{
	"bearerAuth": []
	}
	],
	"parameters": [
	{
	"name": "jobId",
	"in": "path",
	"description": "ID of the crawl job",
	"required": true,
	"schema": {
	"type": "string"
	}
	}
	],
	"responses": {
	"200": {
	"description": "Successful response",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"status": {
	"type": "string",
	"description": "Returns cancelled."
	}
	}
	}
	}
	}
	},
	"402": {
	"description": "Payment required",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Payment required to access this resource."
	}
	}
	}
	}
	}
	},
	"429": {
	"description": "Too many requests",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "Request rate limit exceeded. Please wait and try again later."
	}
	}
	}
	}
	}
	},
	"500": {
	"description": "Server error",
	"content": {
	"application/json": {
	"schema": {
	"type": "object",
	"properties": {
	"error": {
	"type": "string",
	"example": "An unexpected error occurred on the server."
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"components": {
	"securitySchemes": {
	"bearerAuth": {
	"type": "http",
	"scheme": "bearer"
	}
	},
	"schemas": {
	"ScrapeResponse": {
	"type": "object",
	"properties": {
	"success": {
	"type": "boolean"
	},
	"data": {
	"type": "object",
	"properties": {
	"markdown": {
	"type": "string"
	},
	"content": {
	"type": "string"
	},
	"html": {
	"type": "string",
	"nullable": true,
	"description": "HTML version of the content on page if `includeHtml` is true"
	},
	"rawHtml": {
	"type": "string",
	"nullable": true,
	"description": "Raw HTML content of the page if `includeRawHtml` is true"
	},
	"metadata": {
	"type": "object",
	"properties": {
	"title": {
	"type": "string"
	},
	"description": {
	"type": "string"
	},
	"language": {
	"type": "string",
	"nullable": true
	},
	"sourceURL": {
	"type": "string",
	"format": "uri"
	},
	"<any other metadata> ": {
	"type": "string"
	},
	"pageStatusCode": {
	"type": "integer",
	"description": "The status code of the page"
	},
	"pageError": {
	"type": "string",
	"nullable": true,
	"description": "The error message of the page"
	}

	}
	},
	"llm_extraction": {
	"type": "object",
	"description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
	"nullable": true
	},
	"warning": {
	"type": "string",
	"nullable": true,
	"description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
	}
	}
	}
	}
	},
	"CrawlStatusResponseObj": {
	"type": "object",
	"properties": {
	"markdown": {
	"type": "string"
	},
	"content": {
	"type": "string"
	},
	"html": {
	"type": "string",
	"nullable": true,
	"description": "HTML version of the content on page if `includeHtml` is true"
	},
	"rawHtml": {
	"type": "string",
	"nullable": true,
	"description": "Raw HTML content of the page if `includeRawHtml` is true"
	},
	"index": {
	"type": "integer",
	"description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from."
	},
	"metadata": {
	"type": "object",
	"properties": {
	"title": {
	"type": "string"
	},
	"description": {
	"type": "string"
	},
	"language": {
	"type": "string",
	"nullable": true
	},
	"sourceURL": {
	"type": "string",
	"format": "uri"
	},
	"<any other metadata> ": {
	"type": "string"
	},
	"pageStatusCode": {
	"type": "integer",
	"description": "The status code of the page"
	},
	"pageError": {
	"type": "string",
	"nullable": true,
	"description": "The error message of the page"
	}
	}
	}
	}
	},
	"SearchResponse": {
	"type": "object",
	"properties": {
	"success": {
	"type": "boolean"
	},
	"data": {
	"type": "array",
	"items": {
	"type": "object",
	"properties": {
	"url": {
	"type": "string"
	},
	"markdown": {
	"type": "string"
	},
	"content": {
	"type": "string"
	},
	"metadata": {
	"type": "object",
	"properties": {
	"title": {
	"type": "string"
	},
	"description": {
	"type": "string"
	},
	"language": {
	"type": "string",
	"nullable": true
	},
	"sourceURL": {
	"type": "string",
	"format": "uri"
	}
	}
	}
	}
	}
	}
	}
	},
	"CrawlResponse": {
	"type": "object",
	"properties": {
	"jobId": {
	"type": "string"
	}
	}
	}
	}
	},
	"security": [
	{
	"bearerAuth": []
	}
	]
	}