innofacisteven commited on
Commit
1a8deeb
·
verified ·
1 Parent(s): 96bcc26

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +8 -3
rag.py CHANGED
@@ -9,6 +9,7 @@ from llama_index.core import (
9
  )
10
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
11
  from llama_index.core.llms import MockLLM
 
12
 
13
  # Set cache directory for Hugging Face models
14
  os.environ["HF_HOME"] = "/tmp/hf"
@@ -29,13 +30,17 @@ def initialize_index():
29
  index = load_index_from_storage(storage_context)
30
  else:
31
  os.makedirs(DATA_DIR, exist_ok=True)
32
- # Load documents if folder is not empty
 
 
 
 
 
33
  if os.listdir(DATA_DIR):
34
- documents = SimpleDirectoryReader(DATA_DIR).load_data()
35
  index = VectorStoreIndex.from_documents(documents)
36
  index.storage_context.persist(persist_dir=PERSIST_DIR)
37
  else:
38
- # Create an empty index if no data is found
39
  index = VectorStoreIndex.from_documents([])
40
  return index
41
 
 
9
  )
10
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
11
  from llama_index.core.llms import MockLLM
12
+ from llama_index.readers.file import PyMuPDFReader
13
 
14
  # Set cache directory for Hugging Face models
15
  os.environ["HF_HOME"] = "/tmp/hf"
 
30
  index = load_index_from_storage(storage_context)
31
  else:
32
  os.makedirs(DATA_DIR, exist_ok=True)
33
+ loader = SimpleDirectoryReader(
34
+ DATA_DIR,
35
+ recursive=True,
36
+ required_exts=[".pdf"],
37
+ file_extractor={".pdf": PyMuPDFReader()}
38
+ )
39
  if os.listdir(DATA_DIR):
40
+ documents = loader.load_data()
41
  index = VectorStoreIndex.from_documents(documents)
42
  index.storage_context.persist(persist_dir=PERSIST_DIR)
43
  else:
 
44
  index = VectorStoreIndex.from_documents([])
45
  return index
46