model_endpoints / customgraph.py
karcadan-unicorn's picture
Upload 7 files
c8440e8 verified
import nest_asyncio
nest_asyncio.apply()
from scrapegraphai.graphs import SmartScraperMultiGraph
from scrapegraphai.nodes import FetchNode, ParseNode
from langchain.schema import Document
# Create a custom graph class
class CustomSmartScraperMultiGraph(SmartScraperMultiGraph):
def run(self):
# Fetch data from the URL
url_data = ""
for source in self.source:
if isinstance(source, str) and source.startswith("http"):
fetch_node = FetchNode( input="url | local_dir",
output=["doc", "link_urls", "img_urls"],
node_config={
"verbose": True,
"headless": True,})
url_data = fetch_node.execute({"url": source})
parse_node = ParseNode(
input="doc",
output=["parsed_doc"],
node_config={
"chunk_size": 4096,
"verbose": True,
}
)
parsed_doc = parse_node.execute({"doc": url_data["doc"]})
break # Assuming only one URL needs to be fetched
# Combine URL data with Document data
combined_data = ""
for source in self.source:
if isinstance(source, Document):
combined_data += source.page_content
combined_data += parsed_doc['parsed_doc'][0]
return combined_data
def get_data(pdf_doc, web_url,openai_key):
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-4o",
},
"verbose": True
}
sources = [
web_url,
Document(page_content=pdf_doc, metadata={"source": "local_content"})
]
prompt = "give an indepth analysis"
# Instantiate the custom graph
multiple_search_graph = CustomSmartScraperMultiGraph(
prompt=prompt,
source=sources,
config=graph_config
)
# Run the graph and print the result
result = multiple_search_graph.run()
return result