IsmaeelPandey commited on
Commit
dc3eee7
·
1 Parent(s): 3e7232c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -9
app.py CHANGED
@@ -14,30 +14,26 @@ from langchain.memory import ConversationBufferMemory
14
  from langchain.prompts import PromptTemplate
15
 
16
  from langchain.text_splitter import RecursiveCharacterTextSplitter
17
- from langchain.document_loaders import RecursiveUrlLoader
18
  from langchain.chains import RetrievalQA
19
 
20
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
21
  embedding = HuggingFaceHubEmbeddings()
22
 
23
  url = "https://www.whiteslide.co.za/team/"
24
- loader = RecursiveUrlLoader(url=url, max_depth=1)
 
25
  loaders = []
26
 
 
27
  loaders.append(loader)
28
 
29
  docs = []
30
  for l in loaders:
31
  docs.extend(l.load())
32
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=200)
33
  docs = text_splitter.split_documents(docs)
34
 
35
-
36
-
37
-
38
- # text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
39
- # all_splits = text_splitter.split_documents(data)
40
-
41
  vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)
42
 
43
 
 
14
  from langchain.prompts import PromptTemplate
15
 
16
  from langchain.text_splitter import RecursiveCharacterTextSplitter
17
+ from langchain.document_loaders import RecursiveUrlLoader, WebBaseLoader
18
  from langchain.chains import RetrievalQA
19
 
20
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
21
  embedding = HuggingFaceHubEmbeddings()
22
 
23
  url = "https://www.whiteslide.co.za/team/"
24
+ # loader = RecursiveUrlLoader(url=url, max_depth=1)
25
+ loader = WebBaseLoader(url)
26
  loaders = []
27
 
28
+
29
  loaders.append(loader)
30
 
31
  docs = []
32
  for l in loaders:
33
  docs.extend(l.load())
34
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
35
  docs = text_splitter.split_documents(docs)
36
 
 
 
 
 
 
 
37
  vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)
38
 
39