BBQlover commited on
Commit
aa02814
·
verified ·
1 Parent(s): 5d889bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -19,10 +19,14 @@ if not os.path.exists("knowledge_base"):
19
  exit()
20
 
21
  # Load all PDFs from a local folder
22
- # loader = DirectoryLoader("knowledge_base/", glob="**/*.pdf", loader_cls=PyPDFLoader)
23
- # raw_documents = loader.load()
24
  from datasets import load_dataset
25
- dataset = load_dataset("BBQlover/DDaT_with_RAG")
 
 
 
 
 
 
26
 
27
  # Optional: split documents into smaller chunks for better retrieval
28
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
19
  exit()
20
 
21
  # Load all PDFs from a local folder
 
 
22
  from datasets import load_dataset
23
+ from langchain.docstore.document import Document
24
+
25
+ # Load a dataset hosted on Hugging Face
26
+ dataset = load_dataset("BBQlover/DDaT_with_RAG", split="train")
27
+
28
+ # Convert each entry to LangChain-compatible document
29
+ raw_documents = [Document(page_content=entry["text"]) for entry in dataset]
30
 
31
  # Optional: split documents into smaller chunks for better retrieval
32
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)