import requests from bs4 import BeautifulSoup # def load_from_website(url): # response = requests.get(url) # soup = BeautifulSoup(response.content, 'html.parser') # text = soup.get_text(separator="\n") # return [text] # print(load_from_website("https://thehexatech.com")) # print() # print(load_from_website("https://thehexatech.com/about/index.html")) # print() # print(load_from_website("https://thehexatech.com/quote/index.html")) # print() import asyncio from langchain_unstructured import UnstructuredLoader page_url = "https://thehexatech.com/about" loader = UnstructuredLoader(web_url=page_url) docs = [] async def get_data(): global docs async for doc in loader.alazy_load(): docs.append(doc) async def main(): await get_data() # print(docs) for doc in docs: print(doc.page_content) asyncio.run(main())