Spaces:
Running
Running
| from langflow.custom import Component | |
| from langflow.io import ( | |
| DataInput, | |
| IntInput, | |
| Output, | |
| SecretStrInput, | |
| StrInput, | |
| ) | |
| from langflow.schema import Data | |
| class FirecrawlScrapeApi(Component): | |
| display_name: str = "FirecrawlScrapeApi" | |
| description: str = "Firecrawl Scrape API." | |
| name = "FirecrawlScrapeApi" | |
| output_types: list[str] = ["Document"] | |
| documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape" | |
| inputs = [ | |
| SecretStrInput( | |
| name="api_key", | |
| display_name="API Key", | |
| required=True, | |
| password=True, | |
| info="The API key to use Firecrawl API.", | |
| ), | |
| StrInput( | |
| name="url", | |
| display_name="URL", | |
| required=True, | |
| info="The URL to scrape.", | |
| ), | |
| IntInput( | |
| name="timeout", | |
| display_name="Timeout", | |
| info="Timeout in milliseconds for the request.", | |
| ), | |
| DataInput( | |
| name="scrapeOptions", | |
| display_name="Scrape Options", | |
| info="The page options to send with the request.", | |
| ), | |
| DataInput( # https://docs.firecrawl.dev/features/extract | |
| name="extractorOptions", | |
| display_name="Extractor Options", | |
| info="The extractor options to send with the request.", | |
| ), | |
| ] | |
| outputs = [ | |
| Output(display_name="Data", name="data", method="crawl"), | |
| ] | |
| def crawl(self) -> list[Data]: | |
| try: | |
| from firecrawl.firecrawl import FirecrawlApp | |
| except ImportError as e: | |
| msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." | |
| raise ImportError(msg) from e | |
| params = self.scrapeOptions.__dict__["data"] if self.scrapeOptions else {} | |
| extractor_options_dict = self.extractorOptions.__dict__["data"] if self.extractorOptions else {} | |
| if extractor_options_dict: | |
| params["extract"] = extractor_options_dict | |
| app = FirecrawlApp(api_key=self.api_key) | |
| results = app.scrape_url(self.url, params=params) | |
| return Data(data=results) | |