Spaces:
Sleeping
Sleeping
| from utils.settings import configure_settings | |
| from utils.constant import * | |
| from llama_parse import LlamaParse | |
| from llama_index.core import Settings | |
| from llama_index.core import VectorStoreIndex | |
| from llama_index.core import SimpleDirectoryReader | |
| from llama_index.core import StorageContext, load_index_from_storage | |
| def get_documents(path: str): | |
| print("Getting documents...") | |
| parser = LlamaParse() | |
| file_extractor = {".pdf": parser} | |
| documents = SimpleDirectoryReader( | |
| input_dir=path, | |
| file_extractor=file_extractor | |
| ).load_data() | |
| return documents | |
| def create_index(doc_path: str, index_path: str): | |
| print("Indexing documents...") | |
| configure_settings() | |
| documents = get_documents(doc_path) | |
| nodes = Settings.node_parser.get_nodes_from_documents(documents) | |
| vector_index = VectorStoreIndex(nodes, show_progress=True) | |
| vector_index.storage_context.persist(persist_dir=index_path) | |
| return vector_index | |
| def load_index(path: str): | |
| print("Loading index...") | |
| storage_context = StorageContext.from_defaults(persist_dir=path) | |
| index = load_index_from_storage(storage_context) | |
| return index | |
| if __name__ == "__main__": | |
| doc_path = DATA_PATH | |
| index_path = INDEX_PATH | |
| create_index(doc_path, index_path) |