Spaces:
Running
Running
File size: 930 Bytes
2b11763 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import requests
from bs4 import BeautifulSoup
# def load_from_website(url):
# response = requests.get(url)
# soup = BeautifulSoup(response.content, 'html.parser')
# text = soup.get_text(separator="\n")
# return [text]
# print(load_from_website("https://thehexatech.com"))
# print()
# print(load_from_website("https://thehexatech.com/about/index.html"))
# print()
# print(load_from_website("https://thehexatech.com/quote/index.html"))
# print()
import asyncio
from langchain_unstructured import UnstructuredLoader
page_url = "https://thehexatech.com/about"
loader = UnstructuredLoader(web_url=page_url)
docs = []
async def get_data():
global docs
async for doc in loader.alazy_load():
docs.append(doc)
async def main():
await get_data()
# print(docs)
for doc in docs:
print(doc.page_content)
asyncio.run(main())
|