Spaces:

dembasowmr
/

CompassIA

Runtime error

App Files Files Community

CompassIA / src /config.py

dembasowmr

Added userId and date-time to the conversations.

a2967ae 10 months ago

raw

history blame contribute delete

2.69 kB

	import os

	from dotenv import load_dotenv # Import load_dotenv for local execution
	# CRITICAL FIX: Load environment variables for local testing
	load_dotenv(dotenv_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), '.env.local'))

	# --- OpenRouter DeepSeek API Configuration ---
	# Your DeepSeek API key, fetched from environment variables.
	# This should be set as a secret on Hugging Face Spaces.
	DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_R1_V3_API_KEY")
	if DEEPSEEK_API_KEY:
	DEEPSEEK_API_KEY = DEEPSEEK_API_KEY.strip()

	# Base URL for the OpenRouter API.
	DEEPSEEK_API_URL = 'https://openrouter.ai/api/v1/chat/completions'

	# Headers required for OpenRouter API authentication.
	DEEPSEEK_HEADERS = {
	'Authorization': f'Bearer {DEEPSEEK_API_KEY}',
	'Content-Type': 'application/json'
	}

	# --- Embedding Model Configuration ---
	# Name of the Hugging Face model for embeddings.
	EMBEDDING_MODEL_NAME = 'BAAI/bge-m3'
	# Use float16 for reduced memory usage if supported by hardware (e.g., GPU).
	# Set to False if encountering issues on CPU.
	EMBEDDING_MODEL_USE_FP16 = True

	# --- ChromaDB Configuration ---
	# Directory where ChromaDB will persist its database files.
	# This should be relative to your application's working directory.
	CHROMADB_PERSIST_DIRECTORY = "./chroma_db"
	# Name of the collection within ChromaDB where document chunks will be stored.
	CHROMADB_COLLECTION_NAME = "pdf_documents_collection"

	# --- Document Chunking Configuration ---
	# Maximum size of text chunks for embedding and retrieval.
	CHUNK_SIZE = 700
	# Overlap between consecutive chunks to maintain context.
	CHUNK_OVERLAP = 100

	# --- LLM Response Parameters ---
	# Temperature for the DeepSeek model. Lower values make output more deterministic.
	LLM_TEMPERATURE = 0.5
	# Maximum number of tokens the LLM can generate in a response.
	LLM_MAX_TOKENS = 4096 # Adjusted to a more reasonable value for DeepSeek
	# Max tokens for conversation history truncation (approximate, not exact token count)
	LLM_HISTORY_MAX_TOKENS = 9192

	# --- Tesseract and Poppler Configuration (Docker/Deployment Specific) ---
	# Environment variables set in Dockerfile for Tesseract.
	TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX", "/usr/share/tesseract-ocr/4.00/tessdata")
	TESSERACT_CMD = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
	# Path to Poppler's bin directory if not in system PATH (mostly for local Windows setup).
	POPPLER_PATH = None # e.g., r'C:\path\to\poppler\bin'



	# --- Firebase Configuration (for Conversational Memory) ---
	# Base64 encoded JSON string of your Firebase Service Account Key.
	# This should be set as a secret on Hugging Face Spaces.
	FIREBASE_CONFIG_BASE64 = os.getenv("FIREBASE_CONFIG_BASE64")