Spaces:
Running
Running
| import requests | |
| from bs4 import BeautifulSoup | |
| # def load_from_website(url): | |
| # response = requests.get(url) | |
| # soup = BeautifulSoup(response.content, 'html.parser') | |
| # text = soup.get_text(separator="\n") | |
| # return [text] | |
| # print(load_from_website("https://thehexatech.com")) | |
| # print() | |
| # print(load_from_website("https://thehexatech.com/about/index.html")) | |
| # print() | |
| # print(load_from_website("https://thehexatech.com/quote/index.html")) | |
| # print() | |
| import asyncio | |
| from langchain_unstructured import UnstructuredLoader | |
| page_url = "https://thehexatech.com/about" | |
| loader = UnstructuredLoader(web_url=page_url) | |
| docs = [] | |
| async def get_data(): | |
| global docs | |
| async for doc in loader.alazy_load(): | |
| docs.append(doc) | |
| async def main(): | |
| await get_data() | |
| # print(docs) | |
| for doc in docs: | |
| print(doc.page_content) | |
| asyncio.run(main()) | |