Spaces:
Sleeping
Sleeping
Update rag.py
Browse files
rag.py
CHANGED
|
@@ -9,6 +9,7 @@ from llama_index.core import (
|
|
| 9 |
)
|
| 10 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 11 |
from llama_index.core.llms import MockLLM
|
|
|
|
| 12 |
|
| 13 |
# Set cache directory for Hugging Face models
|
| 14 |
os.environ["HF_HOME"] = "/tmp/hf"
|
|
@@ -29,13 +30,17 @@ def initialize_index():
|
|
| 29 |
index = load_index_from_storage(storage_context)
|
| 30 |
else:
|
| 31 |
os.makedirs(DATA_DIR, exist_ok=True)
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
if os.listdir(DATA_DIR):
|
| 34 |
-
documents =
|
| 35 |
index = VectorStoreIndex.from_documents(documents)
|
| 36 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
| 37 |
else:
|
| 38 |
-
# Create an empty index if no data is found
|
| 39 |
index = VectorStoreIndex.from_documents([])
|
| 40 |
return index
|
| 41 |
|
|
|
|
| 9 |
)
|
| 10 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 11 |
from llama_index.core.llms import MockLLM
|
| 12 |
+
from llama_index.readers.file import PyMuPDFReader
|
| 13 |
|
| 14 |
# Set cache directory for Hugging Face models
|
| 15 |
os.environ["HF_HOME"] = "/tmp/hf"
|
|
|
|
| 30 |
index = load_index_from_storage(storage_context)
|
| 31 |
else:
|
| 32 |
os.makedirs(DATA_DIR, exist_ok=True)
|
| 33 |
+
loader = SimpleDirectoryReader(
|
| 34 |
+
DATA_DIR,
|
| 35 |
+
recursive=True,
|
| 36 |
+
required_exts=[".pdf"],
|
| 37 |
+
file_extractor={".pdf": PyMuPDFReader()}
|
| 38 |
+
)
|
| 39 |
if os.listdir(DATA_DIR):
|
| 40 |
+
documents = loader.load_data()
|
| 41 |
index = VectorStoreIndex.from_documents(documents)
|
| 42 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
| 43 |
else:
|
|
|
|
| 44 |
index = VectorStoreIndex.from_documents([])
|
| 45 |
return index
|
| 46 |
|