File size: 930 Bytes
2b11763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from bs4 import BeautifulSoup

# def load_from_website(url):
#         response = requests.get(url)
#         soup = BeautifulSoup(response.content, 'html.parser')
#         text = soup.get_text(separator="\n")
#         return [text]

# print(load_from_website("https://thehexatech.com"))
# print()
# print(load_from_website("https://thehexatech.com/about/index.html"))
# print()
# print(load_from_website("https://thehexatech.com/quote/index.html"))
# print()


import asyncio
from langchain_unstructured import UnstructuredLoader

page_url = "https://thehexatech.com/about" 
loader = UnstructuredLoader(web_url=page_url)

docs = []

async def get_data():
    global docs
    async for doc in loader.alazy_load():
        docs.append(doc)

async def main():
    await get_data()
    # print(docs)
    for doc in docs:
        print(doc.page_content)

asyncio.run(main())