Spaces:
Sleeping
Sleeping
File size: 9,961 Bytes
f871fed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 |
# API CONFIGURATION
# URL where the API can be accessed by the browser
# This setting allows the frontend to connect to the API at runtime (no rebuild needed!)
#
# IMPORTANT: Do NOT include /api at the end - it will be added automatically!
#
# Common scenarios:
# - Docker on localhost: http://localhost:5055 (default, works for most cases)
# - Docker on LAN/remote server: http://192.168.1.100:5055 or http://your-server-ip:5055
# - Behind reverse proxy with custom domain: https://your-domain.com
# - Behind reverse proxy with subdomain: https://api.your-domain.com
#
# Examples for reverse proxy users:
# - API_URL=https://notebook.example.com (frontend will call https://notebook.example.com/api/*)
# - API_URL=https://api.example.com (frontend will call https://api.example.com/api/*)
#
# Note: If not set, the system will auto-detect based on the incoming request.
# Only set this if you need to override the auto-detection (e.g., reverse proxy scenarios).
API_URL=http://localhost:5055
# INTERNAL API URL (Server-Side)
# URL where Next.js server-side should proxy API requests (via rewrites)
# This is DIFFERENT from API_URL which is used by the browser client
#
# INTERNAL_API_URL is used by Next.js rewrites to forward /api/* requests to the FastAPI backend
# API_URL is used by the browser to know where to make API calls
#
# Default: http://localhost:5055 (single-container deployment - both services on same host)
# Override for multi-container: INTERNAL_API_URL=http://api-service:5055
#
# Common scenarios:
# - Single container (default): Don't set - defaults to http://localhost:5055
# - Multi-container Docker Compose: INTERNAL_API_URL=http://api:5055 (use service name)
# - Kubernetes/advanced networking: INTERNAL_API_URL=http://api-service.namespace.svc.cluster.local:5055
#
# Why two variables?
# - API_URL: External/public URL that browsers use (can be https://your-domain.com)
# - INTERNAL_API_URL: Internal container networking URL (usually http://localhost:5055 or service name)
#
# INTERNAL_API_URL=http://localhost:5055
# API CLIENT TIMEOUT (in seconds)
# Controls how long the frontend/Streamlit UI waits for API responses
# Increase this if you're using slow AI providers or hardware (Ollama on CPU, remote LM Studio, etc.)
# Default: 300 seconds (5 minutes) - sufficient for most transformation/insight operations
#
# Common scenarios:
# - Fast cloud APIs (OpenAI, Anthropic): 300 seconds is more than enough
# - Local Ollama on GPU: 300 seconds should work fine
# - Local Ollama on CPU: Consider 600 seconds (10 minutes) or more
# - Remote LM Studio over slow network: Consider 900 seconds (15 minutes)
# - Very large documents: May need 900+ seconds
#
# API_CLIENT_TIMEOUT=300
# ESPERANTO LLM TIMEOUT (in seconds)
# Controls the timeout for AI model API calls at the Esperanto library level
# This is separate from API_CLIENT_TIMEOUT and applies to the actual LLM provider requests
# Only increase this if you're experiencing timeouts during model inference itself
# Default: 60 seconds (built into Esperanto)
#
# Important: This should generally be LOWER than API_CLIENT_TIMEOUT to allow proper error handling
#
# Common scenarios:
# - Fast cloud APIs (OpenAI, Anthropic, Groq): 60 seconds is sufficient
# - Local Ollama with small models: 120-180 seconds may help
# - Local Ollama with large models on CPU: 300+ seconds
# - Remote or self-hosted LLMs: 180-300 seconds depending on hardware
#
# Note: If transformations complete but you see timeout errors, increase API_CLIENT_TIMEOUT first.
# Only increase ESPERANTO_LLM_TIMEOUT if the model itself is timing out during inference.
#
# ESPERANTO_LLM_TIMEOUT=60
# SSL VERIFICATION CONFIGURATION
# Configure SSL certificate verification for local AI providers (Ollama, LM Studio, etc.)
# behind reverse proxies with self-signed certificates
#
# Option 1: Custom CA Bundle (recommended for self-signed certs)
# Point to your CA certificate file to verify SSL while using custom certificates
# ESPERANTO_SSL_CA_BUNDLE=/path/to/your/ca-bundle.pem
#
# Option 2: Disable SSL Verification (development only)
# WARNING: Disabling SSL verification exposes you to man-in-the-middle attacks
# Only use in trusted development/testing environments
# ESPERANTO_SSL_VERIFY=false
# SECURITY
# Set this to protect your Open Notebook instance with a password (for public hosting)
# OPEN_NOTEBOOK_PASSWORD=
# OPENAI
# OPENAI_API_KEY=
# ANTHROPIC
# ANTHROPIC_API_KEY=
# GEMINI
# this is the best model for long context and podcast generation
# GOOGLE_API_KEY=
# GEMINI_API_BASE_URL= # Optional: Override default endpoint (for Vertex AI, proxies, etc.)
# VERTEXAI
# VERTEX_PROJECT=my-google-cloud-project-name
# GOOGLE_APPLICATION_CREDENTIALS=./google-credentials.json
# VERTEX_LOCATION=us-east5
# MISTRAL
# MISTRAL_API_KEY=
# DEEPSEEK
# DEEPSEEK_API_KEY=
# OLLAMA
# OLLAMA_API_BASE="http://10.20.30.20:11434"
# OPEN ROUTER
# OPENROUTER_BASE_URL="https://openrouter.ai/api/v1"
# OPENROUTER_API_KEY=
# GROQ
# GROQ_API_KEY=
# XAI
# XAI_API_KEY=
# ELEVENLABS
# Used only by the podcast feature
# ELEVENLABS_API_KEY=
# TTS BATCH SIZE
# Controls concurrent TTS requests for podcast generation (default: 5)
# Lower values reduce provider load but increase generation time
# Recommended: OpenAI=5, ElevenLabs=2, Google=4, Custom=1
# TTS_BATCH_SIZE=2
# VOYAGE AI
# VOYAGE_API_KEY=
# OPENAI COMPATIBLE ENDPOINTS
# Generic configuration (applies to all modalities: language, embedding, STT, TTS)
# OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
# OPENAI_COMPATIBLE_API_KEY=
# Mode-specific configuration (overrides generic if set)
# Use these when you want different endpoints for different capabilities
# OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
# OPENAI_COMPATIBLE_API_KEY_LLM=
# OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
# OPENAI_COMPATIBLE_API_KEY_EMBEDDING=
# OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
# OPENAI_COMPATIBLE_API_KEY_STT=
# OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
# OPENAI_COMPATIBLE_API_KEY_TTS=
# AZURE OPENAI
# Generic configuration (applies to all modalities: language, embedding, STT, TTS)
# AZURE_OPENAI_API_KEY=
# AZURE_OPENAI_ENDPOINT=
# AZURE_OPENAI_API_VERSION=2024-12-01-preview
# Mode-specific configuration (overrides generic if set)
# Use these when you want different deployments for different AI capabilities
# AZURE_OPENAI_API_KEY_LLM=
# AZURE_OPENAI_ENDPOINT_LLM=
# AZURE_OPENAI_API_VERSION_LLM=
# AZURE_OPENAI_API_KEY_EMBEDDING=
# AZURE_OPENAI_ENDPOINT_EMBEDDING=
# AZURE_OPENAI_API_VERSION_EMBEDDING=
# AZURE_OPENAI_API_KEY_STT=
# AZURE_OPENAI_ENDPOINT_STT=
# AZURE_OPENAI_API_VERSION_STT=
# AZURE_OPENAI_API_KEY_TTS=
# AZURE_OPENAI_ENDPOINT_TTS=
# AZURE_OPENAI_API_VERSION_TTS=
# USE THIS IF YOU WANT TO DEBUG THE APP ON LANGSMITH
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
# LANGCHAIN_API_KEY=
# LANGCHAIN_PROJECT="Open Notebook"
# CONNECTION DETAILS FOR YOUR SURREAL DB
# New format (preferred) - WebSocket URL
SURREAL_URL="ws://surrealdb/rpc:8000"
SURREAL_USER="root"
SURREAL_PASSWORD="root"
SURREAL_NAMESPACE="open_notebook"
SURREAL_DATABASE="staging"
# RETRY CONFIGURATION (surreal-commands v1.2.0+)
# Global defaults for all background commands unless explicitly overridden at command level
# These settings help commands automatically recover from transient failures like:
# - Database transaction conflicts during concurrent operations
# - Network timeouts when calling external APIs
# - Rate limits from LLM/embedding providers
# - Temporary resource unavailability
# Enable/disable retry globally (default: true)
# Set to false to disable retries for all commands (useful for debugging)
SURREAL_COMMANDS_RETRY_ENABLED=true
# Maximum retry attempts before giving up (default: 3)
# Database operations use 5 attempts (defined per-command)
# API calls use 3 attempts (defined per-command)
SURREAL_COMMANDS_RETRY_MAX_ATTEMPTS=3
# Wait strategy between retry attempts (default: exponential_jitter)
# Options: exponential_jitter, exponential, fixed, random
# - exponential_jitter: Recommended - prevents thundering herd during DB conflicts
# - exponential: Good for API rate limits (predictable backoff)
# - fixed: Use for quick recovery scenarios
# - random: Use when you want unpredictable retry timing
SURREAL_COMMANDS_RETRY_WAIT_STRATEGY=exponential_jitter
# Minimum wait time between retries in seconds (default: 1)
# Database conflicts: 1 second (fast retry for transient issues)
# API rate limits: 5 seconds (wait for quota reset)
SURREAL_COMMANDS_RETRY_WAIT_MIN=1
# Maximum wait time between retries in seconds (default: 30)
# Database conflicts: 30 seconds maximum
# API rate limits: 120 seconds maximum (defined per-command)
# Total retry time won't exceed max_attempts * wait_max
SURREAL_COMMANDS_RETRY_WAIT_MAX=30
# WORKER CONCURRENCY
# Maximum number of concurrent tasks in the worker pool (default: 5)
# This affects the likelihood of database transaction conflicts during batch operations
#
# Tuning guidelines based on deployment size:
# - Resource-constrained (low CPU/memory): 1-2 workers
# Reduces conflicts and resource usage, but slower processing
#
# - Normal deployment (balanced): 5 workers (RECOMMENDED)
# Good balance between throughput and conflict rate
# Retry logic handles occasional conflicts gracefully
#
# - Large instances (high CPU/memory): 10-20 workers
# Higher throughput but more frequent DB conflicts
# Relies heavily on retry logic with jittered backoff
#
# Note: Higher concurrency increases vectorization speed but also increases
# SurrealDB transaction conflicts. The retry logic with exponential-jitter
# backoff ensures operations complete successfully even at high concurrency.
SURREAL_COMMANDS_MAX_TASKS=5
# OPEN_NOTEBOOK_PASSWORD=
# FIRECRAWL - Get a key at https://firecrawl.dev/
FIRECRAWL_API_KEY=
# JINA - Get a key at https://jina.ai/
JINA_API_KEY= |