import os from dotenv import load_dotenv # Import load_dotenv for local execution # CRITICAL FIX: Load environment variables for local testing load_dotenv(dotenv_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), '.env.local')) # --- OpenRouter DeepSeek API Configuration --- # Your DeepSeek API key, fetched from environment variables. # This should be set as a secret on Hugging Face Spaces. DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_R1_V3_API_KEY") if DEEPSEEK_API_KEY: DEEPSEEK_API_KEY = DEEPSEEK_API_KEY.strip() # Base URL for the OpenRouter API. DEEPSEEK_API_URL = 'https://openrouter.ai/api/v1/chat/completions' # Headers required for OpenRouter API authentication. DEEPSEEK_HEADERS = { 'Authorization': f'Bearer {DEEPSEEK_API_KEY}', 'Content-Type': 'application/json' } # --- Embedding Model Configuration --- # Name of the Hugging Face model for embeddings. EMBEDDING_MODEL_NAME = 'BAAI/bge-m3' # Use float16 for reduced memory usage if supported by hardware (e.g., GPU). # Set to False if encountering issues on CPU. EMBEDDING_MODEL_USE_FP16 = True # --- ChromaDB Configuration --- # Directory where ChromaDB will persist its database files. # This should be relative to your application's working directory. CHROMADB_PERSIST_DIRECTORY = "./chroma_db" # Name of the collection within ChromaDB where document chunks will be stored. CHROMADB_COLLECTION_NAME = "pdf_documents_collection" # --- Document Chunking Configuration --- # Maximum size of text chunks for embedding and retrieval. CHUNK_SIZE = 700 # Overlap between consecutive chunks to maintain context. CHUNK_OVERLAP = 100 # --- LLM Response Parameters --- # Temperature for the DeepSeek model. Lower values make output more deterministic. LLM_TEMPERATURE = 0.5 # Maximum number of tokens the LLM can generate in a response. LLM_MAX_TOKENS = 4096 # Adjusted to a more reasonable value for DeepSeek # Max tokens for conversation history truncation (approximate, not exact token count) LLM_HISTORY_MAX_TOKENS = 9192 # --- Tesseract and Poppler Configuration (Docker/Deployment Specific) --- # Environment variables set in Dockerfile for Tesseract. TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX", "/usr/share/tesseract-ocr/4.00/tessdata") TESSERACT_CMD = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract") # Path to Poppler's bin directory if not in system PATH (mostly for local Windows setup). POPPLER_PATH = None # e.g., r'C:\path\to\poppler\bin' # --- Firebase Configuration (for Conversational Memory) --- # Base64 encoded JSON string of your Firebase Service Account Key. # This should be set as a secret on Hugging Face Spaces. FIREBASE_CONFIG_BASE64 = os.getenv("FIREBASE_CONFIG_BASE64")