Spaces:
Running
Running
| import os | |
| import logging | |
| import shutil | |
| from typing import Optional | |
| from rag_components import KnowledgeRAG | |
| from utils import download_and_unzip_gdrive_folder | |
| from config import ( | |
| GDRIVE_SOURCES_ENABLED, GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR, | |
| RAG_STORAGE_PARENT_DIR, RAG_FAISS_INDEX_SUBDIR_NAME, RAG_LOAD_INDEX_ON_STARTUP, | |
| RAG_EMBEDDING_MODEL_NAME, RAG_EMBEDDING_USE_GPU, | |
| RAG_CHUNK_SIZE, RAG_CHUNK_OVERLAP, | |
| RAG_RERANKER_MODEL_NAME, RAG_RERANKER_ENABLED | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def initialize_and_get_rag_system(force_rebuild: bool = False, source_dir_override: Optional[str] = None, storage_dir_override: Optional[str] = None) -> Optional[KnowledgeRAG]: | |
| logger.info("[RAG_SYSTEM_INIT] Initializing...") | |
| source_dir_to_use = source_dir_override if source_dir_override else RAG_SOURCES_DIR | |
| storage_dir_to_use = storage_dir_override if storage_dir_override else RAG_STORAGE_PARENT_DIR | |
| # GDrive Logic | |
| if GDRIVE_SOURCES_ENABLED and not source_dir_override and GDRIVE_FOLDER_ID_OR_URL: | |
| logger.info("[RAG_SYSTEM_INIT] Downloading sources from GDrive...") | |
| if os.path.exists(RAG_SOURCES_DIR): | |
| shutil.rmtree(RAG_SOURCES_DIR) | |
| download_and_unzip_gdrive_folder(GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR) | |
| faiss_index_path = os.path.join(storage_dir_to_use, RAG_FAISS_INDEX_SUBDIR_NAME) | |
| if force_rebuild and os.path.exists(faiss_index_path): | |
| logger.info("[RAG_SYSTEM_INIT] Force rebuild: deleting old index.") | |
| shutil.rmtree(faiss_index_path) | |
| try: | |
| rag = KnowledgeRAG( | |
| index_storage_dir=storage_dir_to_use, | |
| embedding_model_name=RAG_EMBEDDING_MODEL_NAME, | |
| use_gpu_for_embeddings=RAG_EMBEDDING_USE_GPU, | |
| chunk_size=RAG_CHUNK_SIZE, | |
| chunk_overlap=RAG_CHUNK_OVERLAP, | |
| reranker_model_name=RAG_RERANKER_MODEL_NAME, | |
| enable_reranker=RAG_RERANKER_ENABLED, | |
| ) | |
| loaded = False | |
| if RAG_LOAD_INDEX_ON_STARTUP and not force_rebuild: | |
| if rag.chunk_config_has_changed(): | |
| logger.warning("[RAG_SYSTEM_INIT] Chunk config changed — forcing index rebuild.") | |
| else: | |
| try: | |
| rag.load_index_from_disk() | |
| loaded = True | |
| except Exception as e: | |
| logger.warning(f"[RAG_SYSTEM_INIT] Load failed ({e}). Building new.") | |
| if not loaded: | |
| if not os.path.exists(source_dir_to_use) or not os.listdir(source_dir_to_use): | |
| logger.warning("[RAG_SYSTEM_INIT] No sources found. System empty.") | |
| else: | |
| rag.build_index_from_source_files(source_dir_to_use) | |
| logger.info("[RAG_SYSTEM_INIT] Complete.") | |
| return rag | |
| except Exception as e: | |
| logger.critical(f"[RAG_SYSTEM_INIT] FATAL: {e}", exc_info=True) | |
| return None |