llm-api-proxy / .env.example
Mirrowel
feat(concurrency): ✨ add priority-based concurrency multipliers for credential tiers
aefb706
# ==============================================================================
# || LLM API Key Proxy - Environment Variable Configuration ||
# ==============================================================================
#
# This file provides an example configuration for the proxy server.
# Copy this file to a new file named '.env' in the same directory
# and replace the placeholder values with your actual credentials and settings.
#
# ------------------------------------------------------------------------------
# | [REQUIRED] Proxy Server Settings |
# ------------------------------------------------------------------------------
# A secret key used to authenticate requests to THIS proxy server.
# This can be any string. Your client application must send this key in the
# 'Authorization' header as a Bearer token (e.g., "Authorization: Bearer YOUR_PROXY_API_KEY").
PROXY_API_KEY="YOUR_PROXY_API_KEY"
# ------------------------------------------------------------------------------
# | [API KEYS] Provider API Keys |
# ------------------------------------------------------------------------------
#
# The proxy automatically discovers API keys from environment variables.
# To add multiple keys for a single provider, increment the number at the end
# of the variable name (e.g., GEMINI_API_KEY_1, GEMINI_API_KEY_2).
#
# The provider name is derived from the part of the variable name before "_API_KEY".
# For example, 'GEMINI_API_KEY_1' configures the 'gemini' provider.
#
# --- Google Gemini ---
GEMINI_API_KEY_1="YOUR_GEMINI_API_KEY_1"
GEMINI_API_KEY_2="YOUR_GEMINI_API_KEY_2"
# --- OpenAI / Azure OpenAI ---
# For Azure, ensure your key has access to the desired models.
OPENAI_API_KEY_1="YOUR_OPENAI_OR_AZURE_API_KEY"
# --- Anthropic (Claude) ---
ANTHROPIC_API_KEY_1="YOUR_ANTHROPIC_API_KEY"
# --- OpenRouter ---
OPENROUTER_API_KEY_1="YOUR_OPENROUTER_API_KEY"
# --- Groq ---
GROQ_API_KEY_1="YOUR_GROQ_API_KEY"
# --- Mistral AI ---
MISTRAL_API_KEY_1="YOUR_MISTRAL_API_KEY"
# --- NVIDIA NIM ---
NVIDIA_API_KEY_1="YOUR_NVIDIA_API_KEY"
# --- Co:here ---
COHERE_API_KEY_1="YOUR_COHERE_API_KEY"
# --- AWS Bedrock ---
# Note: Bedrock authentication is typically handled via AWS IAM roles or
# environment variables like AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.
# Only set this if you are using a specific API key for Bedrock.
BEDROCK_API_KEY_1=""
# --- Chutes ---
CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY"
# ------------------------------------------------------------------------------
# | [OAUTH] Provider OAuth 2.0 Credentials |
# ------------------------------------------------------------------------------
#
# The proxy now uses a "local-first" approach for OAuth credentials.
# All OAuth credentials are managed within the 'oauth_creds/' directory.
#
# HOW IT WORKS:
# 1. On the first run, if you provide a path to an existing credential file
# (e.g., from ~/.gemini/), the proxy will COPY it into the local
# 'oauth_creds/' directory with a standardized name (e.g., 'gemini_cli_oauth_1.json').
# 2. On all subsequent runs, the proxy will ONLY use the files found inside
# 'oauth_creds/'. It will no longer scan system-wide directories.
# 3. To add a new account, either use the '--add-credential' tool or manually
# place a new, valid credential file in the 'oauth_creds/' directory.
#
# Use the variables below for the ONE-TIME setup to import existing credentials.
# After the first successful run, you can clear these paths.
#
# --- Google Gemini (gcloud CLI) ---
# Path to your gcloud ADC file (e.g., ~/.config/gcloud/application_default_credentials.json)
# or a credential file from the official 'gemini' CLI (e.g., ~/.gemini/credentials.json).
GEMINI_CLI_OAUTH_1=""
# --- Qwen / Dashscope (Code Companion) ---
# Path to your Qwen credential file (e.g., ~/.qwen/oauth_creds.json).
QWEN_CODE_OAUTH_1=""
# --- iFlow ---
# Path to your iFlow credential file (e.g., ~/.iflow/oauth_creds.json).
IFLOW_OAUTH_1=""
# ------------------------------------------------------------------------------
# | [ADVANCED] Provider-Specific Settings |
# ------------------------------------------------------------------------------
# --- Gemini CLI Project ID ---
# Required if you are using the Gemini CLI OAuth provider and the proxy
# cannot automatically determine your Google Cloud Project ID.
GEMINI_CLI_PROJECT_ID=""
# --- Model Ignore Lists ---
# Specify a comma-separated list of model names to exclude from a provider's
# available models. This is useful for filtering out models you don't want to use.
#
# Format: IGNORE_MODELS_<PROVIDER_NAME>="model-1,model-2,model-3"
#
# Example:
# IGNORE_MODELS_GEMINI="gemini-1.0-pro-vision-latest,gemini-1.0-pro-latest"
# IGNORE_MODELS_OPENAI="gpt-4-turbo,gpt-3.5-turbo-instruct"
IGNORE_MODELS_GEMINI=""
IGNORE_MODELS_OPENAI=""
# --- Model Whitelists (Overrides Blacklists) ---
# Specify a comma-separated list of model names to ALWAYS include from a
# provider's list. This acts as an override for the ignore list.
#
# HOW IT WORKS:
# 1. A model on a whitelist will ALWAYS be available, even if it's also on an
# ignore list (or if the ignore list is set to "*").
# 2. For any models NOT on the whitelist, the standard ignore list logic applies.
#
# This allows for two main use cases:
# - "Pure Whitelist" Mode: Set IGNORE_MODELS_<PROVIDER>="*" and then specify
# only the models you want in WHITELIST_MODELS_<PROVIDER>.
# - "Exemption" Mode: Blacklist a broad range of models (e.g., "*-preview*")
# and then use the whitelist to exempt specific preview models you want to test.
#
# Format: WHITELIST_MODELS_<PROVIDER_NAME>="model-1,model-2"
#
# Example of a pure whitelist for Gemini:
# IGNORE_MODELS_GEMINI="*"
# WHITELIST_MODELS_GEMINI="gemini-1.5-pro-latest,gemini-1.5-flash-latest"
WHITELIST_MODELS_GEMINI=""
WHITELIST_MODELS_OPENAI=""
# --- Maximum Concurrent Requests Per Key ---
# Controls how many concurrent requests for the SAME model can use the SAME key.
# This is useful for providers that can handle concurrent requests without rate limiting.
# Default is 1 (no concurrency, current behavior).
#
# Format: MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER_NAME>=<number>
#
# Example:
# MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=3 # Allow 3 concurrent requests per OpenAI key
# MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1 # Allow only 1 request per Gemini key (default)
#
MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=1
MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1
MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
# --- Credential Rotation Mode ---
# Controls how credentials are rotated when multiple are available for a provider.
# This affects how the proxy selects the next credential to use for requests.
#
# Available modes:
# balanced - (Default) Rotate credentials evenly across requests to distribute load.
# Best for API keys with per-minute rate limits.
# sequential - Use one credential until it's exhausted (429 error), then switch to next.
# Best for credentials with daily/weekly quotas (e.g., free tier accounts).
# When a credential hits quota, it's put on cooldown based on the reset time
# parsed from the provider's error response.
#
# Format: ROTATION_MODE_<PROVIDER_NAME>=<mode>
#
# Provider Defaults:
# - antigravity: sequential (free tier accounts with daily quotas)
# - All others: balanced
#
# Example:
# ROTATION_MODE_GEMINI=sequential # Use Gemini keys until quota exhausted
# ROTATION_MODE_OPENAI=balanced # Distribute load across OpenAI keys (default)
# ROTATION_MODE_ANTIGRAVITY=balanced # Override Antigravity's sequential default
#
# ROTATION_MODE_GEMINI=balanced
# ROTATION_MODE_ANTIGRAVITY=sequential
# --- Priority-Based Concurrency Multipliers ---
# Credentials can be assigned to priority tiers (1=highest, 2, 3, etc.).
# Each tier can have a concurrency multiplier that increases the effective
# concurrent request limit for credentials in that tier.
#
# How it works:
# effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier
#
# This allows paid/premium credentials to handle more concurrent requests than
# free tier credentials, regardless of rotation mode.
#
# Provider Defaults (built into provider classes):
# Antigravity:
# Priority 1: 5x (paid ultra tier)
# Priority 2: 3x (standard paid tier)
# Priority 3+: 2x (sequential mode) or 1x (balanced mode)
# Gemini CLI:
# Priority 1: 5x
# Priority 2: 3x
# Others: 1x (all modes)
#
# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
#
# Mode-specific overrides (optional):
# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
#
# Examples:
# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10 # Override P1 to 10x
# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1 # Override P3 to 1x
# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1 # P2 = 1x in balanced mode only
# --- Model Quota Groups ---
# Models that share quota/cooldown timing. When one model in a group hits
# quota exhausted (429), all models in the group receive the same cooldown timestamp.
# They also reset (archive stats) together when the quota period expires.
#
# This is useful for providers where multiple model variants share the same
# underlying quota (e.g., Claude Sonnet and Opus on Antigravity).
#
# Format: QUOTA_GROUPS_<PROVIDER>_<GROUP>="model1,model2,model3"
#
# To DISABLE a default group, set it to empty string:
# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
#
# Default groups:
# ANTIGRAVITY.CLAUDE: claude-sonnet-4-5,claude-opus-4-5
#
# Examples:
# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
# QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
# ------------------------------------------------------------------------------
# | [ADVANCED] Proxy Configuration |
# ------------------------------------------------------------------------------
# --- OAuth Refresh Interval ---
# How often, in seconds, the background refresher should check and refresh
# expired OAuth tokens.
OAUTH_REFRESH_INTERVAL=600 # Default is 600 seconds (10 minutes)
# --- Skip OAuth Initialization ---
# Set to "true" to prevent the proxy from performing the interactive OAuth
# setup/validation flow on startup. This is highly recommended for non-interactive
# environments like Docker containers or automated scripts.
# Ensure your credentials in 'oauth_creds/' are valid before enabling this.
SKIP_OAUTH_INIT_CHECK=false