| import os
|
| import logging
|
| import shutil
|
| from typing import Optional
|
|
|
| from rag_components import KnowledgeRAG
|
| from utils import download_and_unzip_gdrive_folder
|
| from config import (
|
| GDRIVE_SOURCES_ENABLED, GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR,
|
| RAG_STORAGE_PARENT_DIR, RAG_FAISS_INDEX_SUBDIR_NAME, RAG_LOAD_INDEX_ON_STARTUP,
|
| RAG_EMBEDDING_MODEL_NAME, RAG_EMBEDDING_USE_GPU,
|
| RAG_CHUNK_SIZE, RAG_CHUNK_OVERLAP,
|
| RAG_RERANKER_MODEL_NAME, RAG_RERANKER_ENABLED
|
| )
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
| def initialize_and_get_rag_system(force_rebuild: bool = False, source_dir_override: Optional[str] = None, storage_dir_override: Optional[str] = None) -> Optional[KnowledgeRAG]:
|
|
|
| logger.info("[RAG_SYSTEM_INIT] Initializing...")
|
| source_dir_to_use = source_dir_override if source_dir_override else RAG_SOURCES_DIR
|
| storage_dir_to_use = storage_dir_override if storage_dir_override else RAG_STORAGE_PARENT_DIR
|
|
|
|
|
| if GDRIVE_SOURCES_ENABLED and not source_dir_override and GDRIVE_FOLDER_ID_OR_URL:
|
| logger.info("[RAG_SYSTEM_INIT] Downloading sources from GDrive...")
|
| if os.path.exists(RAG_SOURCES_DIR):
|
| shutil.rmtree(RAG_SOURCES_DIR)
|
| download_and_unzip_gdrive_folder(GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR)
|
|
|
| faiss_index_path = os.path.join(storage_dir_to_use, RAG_FAISS_INDEX_SUBDIR_NAME)
|
|
|
| if force_rebuild and os.path.exists(faiss_index_path):
|
| logger.info("[RAG_SYSTEM_INIT] Force rebuild: deleting old index.")
|
| shutil.rmtree(faiss_index_path)
|
|
|
| try:
|
| rag = KnowledgeRAG(
|
| index_storage_dir=storage_dir_to_use,
|
| embedding_model_name=RAG_EMBEDDING_MODEL_NAME,
|
| use_gpu_for_embeddings=RAG_EMBEDDING_USE_GPU,
|
| chunk_size=RAG_CHUNK_SIZE,
|
| chunk_overlap=RAG_CHUNK_OVERLAP,
|
| reranker_model_name=RAG_RERANKER_MODEL_NAME,
|
| enable_reranker=RAG_RERANKER_ENABLED,
|
| )
|
|
|
| loaded = False
|
| if RAG_LOAD_INDEX_ON_STARTUP and not force_rebuild:
|
| if rag.chunk_config_has_changed():
|
| logger.warning("[RAG_SYSTEM_INIT] Chunk config changed — forcing index rebuild.")
|
| else:
|
| try:
|
| rag.load_index_from_disk()
|
| loaded = True
|
| except Exception as e:
|
| logger.warning(f"[RAG_SYSTEM_INIT] Load failed ({e}). Building new.")
|
|
|
| if not loaded:
|
| if not os.path.exists(source_dir_to_use) or not os.listdir(source_dir_to_use):
|
| logger.warning("[RAG_SYSTEM_INIT] No sources found. System empty.")
|
| else:
|
| rag.build_index_from_source_files(source_dir_to_use)
|
|
|
| logger.info("[RAG_SYSTEM_INIT] Complete.")
|
| return rag
|
|
|
| except Exception as e:
|
| logger.critical(f"[RAG_SYSTEM_INIT] FATAL: {e}", exc_info=True)
|
| return None |