File size: 2,875 Bytes
545620d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86eca65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import logging
import shutil
from typing import Optional

from rag_components import KnowledgeRAG
from utils import download_and_unzip_gdrive_folder
from config import (
    GDRIVE_SOURCES_ENABLED, GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR,
    RAG_STORAGE_PARENT_DIR, RAG_FAISS_INDEX_SUBDIR_NAME, RAG_LOAD_INDEX_ON_STARTUP,
    RAG_EMBEDDING_MODEL_NAME, RAG_EMBEDDING_USE_GPU, 
    RAG_CHUNK_SIZE, RAG_CHUNK_OVERLAP,
    RAG_RERANKER_MODEL_NAME, RAG_RERANKER_ENABLED
)

logger = logging.getLogger(__name__)

def initialize_and_get_rag_system(force_rebuild: bool = False, source_dir_override: Optional[str] = None) -> Optional[KnowledgeRAG]:
    
    logger.info("[RAG_SYSTEM_INIT] Initializing...")
    source_dir_to_use = source_dir_override if source_dir_override else RAG_SOURCES_DIR
    
    # GDrive Logic (Restored)
    if GDRIVE_SOURCES_ENABLED and not source_dir_override and GDRIVE_FOLDER_ID_OR_URL:
        logger.info("[RAG_SYSTEM_INIT] Downloading sources from GDrive...")
        if os.path.exists(RAG_SOURCES_DIR):
            shutil.rmtree(RAG_SOURCES_DIR)
        download_and_unzip_gdrive_folder(GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR)

    faiss_index_path = os.path.join(RAG_STORAGE_PARENT_DIR, RAG_FAISS_INDEX_SUBDIR_NAME)

    if force_rebuild and os.path.exists(faiss_index_path):
        logger.info("[RAG_SYSTEM_INIT] Force rebuild: deleting old index.")
        shutil.rmtree(faiss_index_path)

    try:
        rag = KnowledgeRAG(
            index_storage_dir=RAG_STORAGE_PARENT_DIR, 
            embedding_model_name=RAG_EMBEDDING_MODEL_NAME,
            use_gpu_for_embeddings=RAG_EMBEDDING_USE_GPU,
            chunk_size=RAG_CHUNK_SIZE,
            chunk_overlap=RAG_CHUNK_OVERLAP,
            reranker_model_name=RAG_RERANKER_MODEL_NAME,
            enable_reranker=RAG_RERANKER_ENABLED,
        )

        loaded = False
        if RAG_LOAD_INDEX_ON_STARTUP and not force_rebuild:
            # Check if chunk settings have changed since the index was built
            if rag.chunk_config_has_changed():
                logger.warning("[RAG_SYSTEM_INIT] Chunk config changed — forcing index rebuild.")
            else:
                try:
                    rag.load_index_from_disk()
                    loaded = True
                except Exception as e:
                    logger.warning(f"[RAG_SYSTEM_INIT] Load failed ({e}). Building new.")

        if not loaded:
            if not os.path.exists(source_dir_to_use) or not os.listdir(source_dir_to_use):
                logger.warning("[RAG_SYSTEM_INIT] No sources found. System empty.")
            else:
                rag.build_index_from_source_files(source_dir_to_use)

        logger.info("[RAG_SYSTEM_INIT] Complete.")
        return rag

    except Exception as e:
        logger.critical(f"[RAG_SYSTEM_INIT] FATAL: {e}", exc_info=True)
        return None