import nest_asyncio nest_asyncio.apply() from scrapegraphai.graphs import SmartScraperMultiGraph from scrapegraphai.nodes import FetchNode, ParseNode from langchain.schema import Document # Create a custom graph class class CustomSmartScraperMultiGraph(SmartScraperMultiGraph): def run(self): # Fetch data from the URL url_data = "" for source in self.source: if isinstance(source, str) and source.startswith("http"): fetch_node = FetchNode( input="url | local_dir", output=["doc", "link_urls", "img_urls"], node_config={ "verbose": True, "headless": True,}) url_data = fetch_node.execute({"url": source}) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ "chunk_size": 4096, "verbose": True, } ) parsed_doc = parse_node.execute({"doc": url_data["doc"]}) break # Assuming only one URL needs to be fetched # Combine URL data with Document data combined_data = "" for source in self.source: if isinstance(source, Document): combined_data += source.page_content combined_data += parsed_doc['parsed_doc'][0] return combined_data def get_data(pdf_doc, web_url,openai_key): graph_config = { "llm": { "api_key": openai_key, "model": "gpt-4o", }, "verbose": True } sources = [ web_url, Document(page_content=pdf_doc, metadata={"source": "local_content"}) ] prompt = "give an indepth analysis" # Instantiate the custom graph multiple_search_graph = CustomSmartScraperMultiGraph( prompt=prompt, source=sources, config=graph_config ) # Run the graph and print the result result = multiple_search_graph.run() return result