notebooklm-fast / config.py
jashdoshi77
feat: Add AI-powered query understanding with DeepSeek parsing
64deb3c
import os
from dotenv import load_dotenv
load_dotenv()
class Config:
# DeepSeek API - PRIMARY (OpenAI-compatible)
DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY', '')
DEEPSEEK_BASE_URL = 'https://api.deepseek.com/v1'
DEEPSEEK_MODEL = 'deepseek-chat' # DeepSeek V3 model
# OpenRouter API - For OCR and fallback LLM
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', '')
OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'
# AI Models (OpenRouter fallback)
MODEL_MAP = {
'gemma': 'google/gemma-3-4b-it:free',
'mistral': 'mistralai/mistral-small-3.1-24b-instruct:free',
}
# Use DeepSeek first, then OpenRouter models
USE_DEEPSEEK = True # Set to False to use OpenRouter instead
FALLBACK_ORDER = ['gemma', 'mistral']
# ChromaDB Cloud Configuration
CHROMA_API_KEY = os.getenv('CHROMA_API_KEY', '')
CHROMA_TENANT = os.getenv('CHROMA_TENANT', 'default_tenant')
CHROMA_DATABASE = os.getenv('CHROMA_DATABASE', 'default_database')
CHROMA_HOST = 'api.trychroma.com' # ChromaDB Cloud endpoint
# JWT Configuration
JWT_SECRET = os.getenv('JWT_SECRET', 'your-secret-key-change-in-production')
JWT_EXPIRY_HOURS = 24
# Upload Configuration
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
MAX_CONTENT_LENGTH = 200 * 1024 * 1024 # 200MB max file size
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'txt', 'md', 'png', 'jpg', 'jpeg', 'gif', 'webp'}
CHUNK_SIZE = 500 # Smaller chunks for higher precision with many documents
CHUNK_OVERLAP = 150 # Good overlap to avoid losing info at boundaries
TOP_K_RESULTS = 100 # High - comprehensive retrieval for 3000+ docs across buckets
AI_TEMPERATURE = 0.0 # Zero temperature for maximum determinism and accuracy
RELEVANCE_THRESHOLD = 3.0 # Higher threshold - include all potentially relevant
MAX_CONVERSATION_HISTORY = 20 # Remember more conversation for pronoun context
AI_MAX_TOKENS = 4096 # Maximum tokens for detailed responses
AI_TIMEOUT = 90 # More time for complex multi-document queries