import os import logging import shutil from typing import Optional from rag_components import KnowledgeRAG from utils import download_and_unzip_gdrive_folder from config import ( GDRIVE_SOURCES_ENABLED, GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR, RAG_STORAGE_PARENT_DIR, RAG_FAISS_INDEX_SUBDIR_NAME, RAG_LOAD_INDEX_ON_STARTUP, RAG_EMBEDDING_MODEL_NAME, RAG_EMBEDDING_USE_GPU, RAG_CHUNK_SIZE, RAG_CHUNK_OVERLAP, RAG_RERANKER_MODEL_NAME, RAG_RERANKER_ENABLED ) logger = logging.getLogger(__name__) def initialize_and_get_rag_system(force_rebuild: bool = False, source_dir_override: Optional[str] = None, storage_dir_override: Optional[str] = None) -> Optional[KnowledgeRAG]: logger.info("[RAG_SYSTEM_INIT] Initializing...") source_dir_to_use = source_dir_override if source_dir_override else RAG_SOURCES_DIR storage_dir_to_use = storage_dir_override if storage_dir_override else RAG_STORAGE_PARENT_DIR # GDrive Logic if GDRIVE_SOURCES_ENABLED and not source_dir_override and GDRIVE_FOLDER_ID_OR_URL: logger.info("[RAG_SYSTEM_INIT] Downloading sources from GDrive...") if os.path.exists(RAG_SOURCES_DIR): shutil.rmtree(RAG_SOURCES_DIR) download_and_unzip_gdrive_folder(GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR) faiss_index_path = os.path.join(storage_dir_to_use, RAG_FAISS_INDEX_SUBDIR_NAME) if force_rebuild and os.path.exists(faiss_index_path): logger.info("[RAG_SYSTEM_INIT] Force rebuild: deleting old index.") shutil.rmtree(faiss_index_path) try: rag = KnowledgeRAG( index_storage_dir=storage_dir_to_use, embedding_model_name=RAG_EMBEDDING_MODEL_NAME, use_gpu_for_embeddings=RAG_EMBEDDING_USE_GPU, chunk_size=RAG_CHUNK_SIZE, chunk_overlap=RAG_CHUNK_OVERLAP, reranker_model_name=RAG_RERANKER_MODEL_NAME, enable_reranker=RAG_RERANKER_ENABLED, ) loaded = False if RAG_LOAD_INDEX_ON_STARTUP and not force_rebuild: if rag.chunk_config_has_changed(): logger.warning("[RAG_SYSTEM_INIT] Chunk config changed — forcing index rebuild.") else: try: rag.load_index_from_disk() loaded = True except Exception as e: logger.warning(f"[RAG_SYSTEM_INIT] Load failed ({e}). Building new.") if not loaded: if not os.path.exists(source_dir_to_use) or not os.listdir(source_dir_to_use): logger.warning("[RAG_SYSTEM_INIT] No sources found. System empty.") else: rag.build_index_from_source_files(source_dir_to_use) logger.info("[RAG_SYSTEM_INIT] Complete.") return rag except Exception as e: logger.critical(f"[RAG_SYSTEM_INIT] FATAL: {e}", exc_info=True) return None