Spaces:

karcadan-unicorn
/

model_endpoints

Sleeping

model_endpoints / customgraph.py

Upload 7 files

c8440e8 verified over 1 year ago

2.43 kB

	import nest_asyncio
	nest_asyncio.apply()

	from scrapegraphai.graphs import SmartScraperMultiGraph
	from scrapegraphai.nodes import FetchNode, ParseNode
	from langchain.schema import Document

	# Create a custom graph class
	class CustomSmartScraperMultiGraph(SmartScraperMultiGraph):
	def run(self):
	# Fetch data from the URL
	url_data = ""
	for source in self.source:
	if isinstance(source, str) and source.startswith("http"):
	fetch_node = FetchNode( input="url \| local_dir",
	output=["doc", "link_urls", "img_urls"],
	node_config={
	"verbose": True,
	"headless": True,})

	url_data = fetch_node.execute({"url": source})

	parse_node = ParseNode(
	input="doc",
	output=["parsed_doc"],
	node_config={
	"chunk_size": 4096,
	"verbose": True,
	}
	)

	parsed_doc = parse_node.execute({"doc": url_data["doc"]})

	break # Assuming only one URL needs to be fetched

	# Combine URL data with Document data
	combined_data = ""
	for source in self.source:
	if isinstance(source, Document):
	combined_data += source.page_content
	combined_data += parsed_doc['parsed_doc'][0]


	return combined_data


	def get_data(pdf_doc, web_url,openai_key):

	graph_config = {
	"llm": {
	"api_key": openai_key,
	"model": "gpt-4o",
	},
	"verbose": True

	}

	sources = [
	web_url,
	Document(page_content=pdf_doc, metadata={"source": "local_content"})
	]

	prompt = "give an indepth analysis"

	# Instantiate the custom graph
	multiple_search_graph = CustomSmartScraperMultiGraph(
	prompt=prompt,
	source=sources,
	config=graph_config
	)

	# Run the graph and print the result
	result = multiple_search_graph.run()
	return result