AI-Agent-RAG-Bot-Test / rag_system.py
SakibAhmed's picture
Upload 14 files
ca6e669 verified
import os
import logging
import shutil
from typing import Optional
from rag_components import KnowledgeRAG
from utils import download_and_unzip_gdrive_folder
from config import (
GDRIVE_SOURCES_ENABLED, GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR,
RAG_STORAGE_PARENT_DIR, RAG_FAISS_INDEX_SUBDIR_NAME, RAG_LOAD_INDEX_ON_STARTUP,
RAG_EMBEDDING_MODEL_NAME, RAG_EMBEDDING_USE_GPU,
RAG_CHUNK_SIZE, RAG_CHUNK_OVERLAP,
RAG_RERANKER_MODEL_NAME, RAG_RERANKER_ENABLED
)
logger = logging.getLogger(__name__)
def initialize_and_get_rag_system(force_rebuild: bool = False, source_dir_override: Optional[str] = None, storage_dir_override: Optional[str] = None) -> Optional[KnowledgeRAG]:
logger.info("[RAG_SYSTEM_INIT] Initializing...")
source_dir_to_use = source_dir_override if source_dir_override else RAG_SOURCES_DIR
storage_dir_to_use = storage_dir_override if storage_dir_override else RAG_STORAGE_PARENT_DIR
# GDrive Logic
if GDRIVE_SOURCES_ENABLED and not source_dir_override and GDRIVE_FOLDER_ID_OR_URL:
logger.info("[RAG_SYSTEM_INIT] Downloading sources from GDrive...")
if os.path.exists(RAG_SOURCES_DIR):
shutil.rmtree(RAG_SOURCES_DIR)
download_and_unzip_gdrive_folder(GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR)
faiss_index_path = os.path.join(storage_dir_to_use, RAG_FAISS_INDEX_SUBDIR_NAME)
if force_rebuild and os.path.exists(faiss_index_path):
logger.info("[RAG_SYSTEM_INIT] Force rebuild: deleting old index.")
shutil.rmtree(faiss_index_path)
try:
rag = KnowledgeRAG(
index_storage_dir=storage_dir_to_use,
embedding_model_name=RAG_EMBEDDING_MODEL_NAME,
use_gpu_for_embeddings=RAG_EMBEDDING_USE_GPU,
chunk_size=RAG_CHUNK_SIZE,
chunk_overlap=RAG_CHUNK_OVERLAP,
reranker_model_name=RAG_RERANKER_MODEL_NAME,
enable_reranker=RAG_RERANKER_ENABLED,
)
loaded = False
if RAG_LOAD_INDEX_ON_STARTUP and not force_rebuild:
if rag.chunk_config_has_changed():
logger.warning("[RAG_SYSTEM_INIT] Chunk config changed — forcing index rebuild.")
else:
try:
rag.load_index_from_disk()
loaded = True
except Exception as e:
logger.warning(f"[RAG_SYSTEM_INIT] Load failed ({e}). Building new.")
if not loaded:
if not os.path.exists(source_dir_to_use) or not os.listdir(source_dir_to_use):
logger.warning("[RAG_SYSTEM_INIT] No sources found. System empty.")
else:
rag.build_index_from_source_files(source_dir_to_use)
logger.info("[RAG_SYSTEM_INIT] Complete.")
return rag
except Exception as e:
logger.critical(f"[RAG_SYSTEM_INIT] FATAL: {e}", exc_info=True)
return None