|
|
from langflow.custom import Component |
|
|
from langflow.io import ( |
|
|
DataInput, |
|
|
IntInput, |
|
|
Output, |
|
|
SecretStrInput, |
|
|
StrInput, |
|
|
) |
|
|
from langflow.schema import Data |
|
|
|
|
|
|
|
|
class FirecrawlScrapeApi(Component): |
|
|
display_name: str = "FirecrawlScrapeApi" |
|
|
description: str = "Firecrawl Scrape API." |
|
|
name = "FirecrawlScrapeApi" |
|
|
|
|
|
output_types: list[str] = ["Document"] |
|
|
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape" |
|
|
|
|
|
inputs = [ |
|
|
SecretStrInput( |
|
|
name="api_key", |
|
|
display_name="API Key", |
|
|
required=True, |
|
|
password=True, |
|
|
info="The API key to use Firecrawl API.", |
|
|
), |
|
|
StrInput( |
|
|
name="url", |
|
|
display_name="URL", |
|
|
required=True, |
|
|
info="The URL to scrape.", |
|
|
), |
|
|
IntInput( |
|
|
name="timeout", |
|
|
display_name="Timeout", |
|
|
info="Timeout in milliseconds for the request.", |
|
|
), |
|
|
DataInput( |
|
|
name="scrapeOptions", |
|
|
display_name="Scrape Options", |
|
|
info="The page options to send with the request.", |
|
|
), |
|
|
DataInput( |
|
|
name="extractorOptions", |
|
|
display_name="Extractor Options", |
|
|
info="The extractor options to send with the request.", |
|
|
), |
|
|
] |
|
|
|
|
|
outputs = [ |
|
|
Output(display_name="Data", name="data", method="crawl"), |
|
|
] |
|
|
|
|
|
def crawl(self) -> list[Data]: |
|
|
try: |
|
|
from firecrawl.firecrawl import FirecrawlApp |
|
|
except ImportError as e: |
|
|
msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." |
|
|
raise ImportError(msg) from e |
|
|
|
|
|
params = self.scrapeOptions.__dict__["data"] if self.scrapeOptions else {} |
|
|
extractor_options_dict = self.extractorOptions.__dict__["data"] if self.extractorOptions else {} |
|
|
if extractor_options_dict: |
|
|
params["extract"] = extractor_options_dict |
|
|
|
|
|
app = FirecrawlApp(api_key=self.api_key) |
|
|
results = app.scrape_url(self.url, params=params) |
|
|
return Data(data=results) |
|
|
|