Spaces:
Runtime error
Runtime error
| import os | |
| from dotenv import load_dotenv # Import load_dotenv for local execution | |
| # CRITICAL FIX: Load environment variables for local testing | |
| load_dotenv(dotenv_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), '.env.local')) | |
| # --- OpenRouter DeepSeek API Configuration --- | |
| # Your DeepSeek API key, fetched from environment variables. | |
| # This should be set as a secret on Hugging Face Spaces. | |
| DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_R1_V3_API_KEY") | |
| if DEEPSEEK_API_KEY: | |
| DEEPSEEK_API_KEY = DEEPSEEK_API_KEY.strip() | |
| # Base URL for the OpenRouter API. | |
| DEEPSEEK_API_URL = 'https://openrouter.ai/api/v1/chat/completions' | |
| # Headers required for OpenRouter API authentication. | |
| DEEPSEEK_HEADERS = { | |
| 'Authorization': f'Bearer {DEEPSEEK_API_KEY}', | |
| 'Content-Type': 'application/json' | |
| } | |
| # --- Embedding Model Configuration --- | |
| # Name of the Hugging Face model for embeddings. | |
| EMBEDDING_MODEL_NAME = 'BAAI/bge-m3' | |
| # Use float16 for reduced memory usage if supported by hardware (e.g., GPU). | |
| # Set to False if encountering issues on CPU. | |
| EMBEDDING_MODEL_USE_FP16 = True | |
| # --- ChromaDB Configuration --- | |
| # Directory where ChromaDB will persist its database files. | |
| # This should be relative to your application's working directory. | |
| CHROMADB_PERSIST_DIRECTORY = "./chroma_db" | |
| # Name of the collection within ChromaDB where document chunks will be stored. | |
| CHROMADB_COLLECTION_NAME = "pdf_documents_collection" | |
| # --- Document Chunking Configuration --- | |
| # Maximum size of text chunks for embedding and retrieval. | |
| CHUNK_SIZE = 700 | |
| # Overlap between consecutive chunks to maintain context. | |
| CHUNK_OVERLAP = 100 | |
| # --- LLM Response Parameters --- | |
| # Temperature for the DeepSeek model. Lower values make output more deterministic. | |
| LLM_TEMPERATURE = 0.5 | |
| # Maximum number of tokens the LLM can generate in a response. | |
| LLM_MAX_TOKENS = 4096 # Adjusted to a more reasonable value for DeepSeek | |
| # Max tokens for conversation history truncation (approximate, not exact token count) | |
| LLM_HISTORY_MAX_TOKENS = 9192 | |
| # --- Tesseract and Poppler Configuration (Docker/Deployment Specific) --- | |
| # Environment variables set in Dockerfile for Tesseract. | |
| TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX", "/usr/share/tesseract-ocr/4.00/tessdata") | |
| TESSERACT_CMD = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract") | |
| # Path to Poppler's bin directory if not in system PATH (mostly for local Windows setup). | |
| POPPLER_PATH = None # e.g., r'C:\path\to\poppler\bin' | |
| # --- Firebase Configuration (for Conversational Memory) --- | |
| # Base64 encoded JSON string of your Firebase Service Account Key. | |
| # This should be set as a secret on Hugging Face Spaces. | |
| FIREBASE_CONFIG_BASE64 = os.getenv("FIREBASE_CONFIG_BASE64") | |