Spaces:
Running
Running
| import os | |
| import logging | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # --- Logging Setup --- | |
| logger = logging.getLogger(__name__) | |
| if not logger.handlers: | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| _MODULE_BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| # API Authentication for n8n (Basic Auth) | |
| API_USERNAME = os.getenv("API_USERNAME", "admin") | |
| API_PASSWORD = os.getenv("API_PASSWORD", "password") | |
| # Admin fallback credentials for dashboard (used when users.csv is missing or unavailable) | |
| ADMIN_USERNAME = os.getenv('FLASK_ADMIN_USERNAME', 'admin') | |
| ADMIN_PASSWORD = os.getenv('FLASK_ADMIN_PASSWORD', '1234') | |
| # URL Fetcher configs (Generalized from Rentry) | |
| URL_FETCH_ENABLED = os.getenv("URL_FETCH_ENABLED", "False").lower() == "true" | |
| EXTERNAL_URL = os.getenv("EXTERNAL_URL", os.getenv("RENTRY_URL", "")) | |
| URL_UPDATE_PERIOD_MINUTES = int(os.getenv("URL_UPDATE_PERIOD_MINUTES", os.getenv("RENTRY_UPDATE_PERIOD_MINUTES", "60"))) | |
| # CSV Configuration | |
| RAG_CSV_MAX_RESULTS = int(os.getenv("RAG_CSV_MAX_RESULTS", "5")) | |
| RAG_CSV_CONFIDENCE_THRESHOLD = float(os.getenv("RAG_CSV_CONFIDENCE_THRESHOLD", "0.5")) | |
| RAG_FAISS_INDEX_SUBDIR_NAME = "faiss_index" | |
| RAG_STORAGE_PARENT_DIR = os.getenv("RAG_STORAGE_DIR", os.path.join(_MODULE_BASE_DIR, "faiss_storage")) | |
| RAG_SOURCES_DIR = os.getenv("SOURCES_DIR", os.path.join(_MODULE_BASE_DIR, "sources")) | |
| RAG_CHUNKED_SOURCES_FILENAME = "pre_chunked_sources.json" | |
| os.makedirs(RAG_SOURCES_DIR, exist_ok=True) | |
| os.makedirs(RAG_STORAGE_PARENT_DIR, exist_ok=True) | |
| # Embedding and model configuration | |
| RAG_EMBEDDING_MODEL_NAME = os.getenv("RAG_EMBEDDING_MODEL", "BAAI/bge-small-en") | |
| RAG_EMBEDDING_USE_GPU = os.getenv("RAG_EMBEDDING_GPU", "False").lower() == "true" | |
| RAG_LOAD_INDEX_ON_STARTUP = os.getenv("RAG_LOAD_INDEX", "True").lower() == "true" | |
| # Retrieval Settings | |
| RAG_INITIAL_FETCH_K = int(os.getenv("RAG_INITIAL_FETCH_K", 20)) | |
| RAG_RERANKER_K = int(os.getenv("RAG_RERANKER_K", 5)) | |
| RAG_MAX_FILES_FOR_INCREMENTAL = int(os.getenv("RAG_MAX_FILES_FOR_INCREMENTAL", "50")) | |
| # Chunk configuration | |
| RAG_CHUNK_SIZE = int(os.getenv("RAG_CHUNK_SIZE", 2000)) | |
| RAG_CHUNK_OVERLAP = int(os.getenv("RAG_CHUNK_OVERLAP", 150)) | |
| # Reranker configuration | |
| RAG_RERANKER_MODEL_NAME = os.getenv("RAG_RERANKER_MODEL", "jinaai/jina-reranker-v2-base-multilingual") | |
| RAG_RERANKER_ENABLED = os.getenv("RAG_RERANKER_ENABLED", "True").lower() == "true" | |
| # GDrive configuration for RAG sources | |
| GDRIVE_SOURCES_ENABLED = os.getenv("GDRIVE_SOURCES_ENABLED", "False").lower() == "true" | |
| GDRIVE_FOLDER_ID_OR_URL = os.getenv("GDRIVE_FOLDER_URL") | |
| # GDrive configuration for downloading a pre-built FAISS index | |
| GDRIVE_INDEX_ENABLED = os.getenv("GDRIVE_INDEX_ENABLED", "False").lower() == "true" | |
| GDRIVE_INDEX_ID_OR_URL = os.getenv("GDRIVE_INDEX_URL") | |
| # GDrive configuration for downloading users.csv | |
| GDRIVE_USERS_CSV_ENABLED = os.getenv("GDRIVE_USERS_CSV_ENABLED", "False").lower() == "true" | |
| GDRIVE_USERS_CSV_ID_OR_URL = os.getenv("GDRIVE_USERS_CSV_URL") | |
| RAG_DETAILED_LOGGING = os.getenv("RAG_DETAILED_LOGGING", "True").lower() == "true" | |
| logger.info(f"RAG Config Loaded - Chunk Size: {RAG_CHUNK_SIZE}, Chunk Overlap: {RAG_CHUNK_OVERLAP}") | |
| logger.info(f"Embedding Model: {RAG_EMBEDDING_MODEL_NAME}") | |
| logger.info(f"Reranker Model: {RAG_RERANKER_MODEL_NAME}") | |
| logger.info(f"Retrieval Pipeline: Initial Fetch K={RAG_INITIAL_FETCH_K}, Reranker Final K={RAG_RERANKER_K}") | |
| logger.info(f"CSV Filters: Max Results={RAG_CSV_MAX_RESULTS}, Threshold={RAG_CSV_CONFIDENCE_THRESHOLD}") | |
| logger.info(f"URL Fetching: {'ENABLED' if URL_FETCH_ENABLED else 'DISABLED'}") | |
| logger.info(f"Detailed Logging: {'ENABLED' if RAG_DETAILED_LOGGING else 'DISABLED'}") | |
| logger.info(f"GDrive Sources Download: {'ENABLED' if GDRIVE_SOURCES_ENABLED else 'DISABLED'}") | |
| logger.info(f"GDrive Pre-built Index Download: {'ENABLED' if GDRIVE_INDEX_ENABLED else 'DISABLED'}") | |
| logger.info(f"GDrive users.csv Download: {'ENABLED' if GDRIVE_USERS_CSV_ENABLED else 'DISABLED'}") |