# ============================================================================== # || LLM API Key Proxy - Environment Variable Configuration || # ============================================================================== # # This file provides an example configuration for the proxy server. # Copy this file to a new file named '.env' in the same directory # and replace the placeholder values with your actual credentials and settings. # # ------------------------------------------------------------------------------ # | [REQUIRED] Proxy Server Settings | # ------------------------------------------------------------------------------ # A secret key used to authenticate requests to THIS proxy server. # This can be any string. Your client application must send this key in the # 'Authorization' header as a Bearer token (e.g., "Authorization: Bearer YOUR_PROXY_API_KEY"). PROXY_API_KEY="YOUR_PROXY_API_KEY" # ------------------------------------------------------------------------------ # | [API KEYS] Provider API Keys | # ------------------------------------------------------------------------------ # # The proxy automatically discovers API keys from environment variables. # To add multiple keys for a single provider, increment the number at the end # of the variable name (e.g., GEMINI_API_KEY_1, GEMINI_API_KEY_2). # # The provider name is derived from the part of the variable name before "_API_KEY". # For example, 'GEMINI_API_KEY_1' configures the 'gemini' provider. # # --- Google Gemini --- GEMINI_API_KEY_1="YOUR_GEMINI_API_KEY_1" GEMINI_API_KEY_2="YOUR_GEMINI_API_KEY_2" # --- OpenAI / Azure OpenAI --- # For Azure, ensure your key has access to the desired models. OPENAI_API_KEY_1="YOUR_OPENAI_OR_AZURE_API_KEY" # --- Anthropic (Claude) --- ANTHROPIC_API_KEY_1="YOUR_ANTHROPIC_API_KEY" # --- OpenRouter --- OPENROUTER_API_KEY_1="YOUR_OPENROUTER_API_KEY" # --- Groq --- GROQ_API_KEY_1="YOUR_GROQ_API_KEY" # --- Mistral AI --- MISTRAL_API_KEY_1="YOUR_MISTRAL_API_KEY" # --- NVIDIA NIM --- NVIDIA_API_KEY_1="YOUR_NVIDIA_API_KEY" # --- Co:here --- COHERE_API_KEY_1="YOUR_COHERE_API_KEY" # --- AWS Bedrock --- # Note: Bedrock authentication is typically handled via AWS IAM roles or # environment variables like AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY. # Only set this if you are using a specific API key for Bedrock. BEDROCK_API_KEY_1="" # --- Chutes --- CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY" # ------------------------------------------------------------------------------ # | [OAUTH] Provider OAuth 2.0 Credentials | # ------------------------------------------------------------------------------ # # The proxy now uses a "local-first" approach for OAuth credentials. # All OAuth credentials are managed within the 'oauth_creds/' directory. # # HOW IT WORKS: # 1. On the first run, if you provide a path to an existing credential file # (e.g., from ~/.gemini/), the proxy will COPY it into the local # 'oauth_creds/' directory with a standardized name (e.g., 'gemini_cli_oauth_1.json'). # 2. On all subsequent runs, the proxy will ONLY use the files found inside # 'oauth_creds/'. It will no longer scan system-wide directories. # 3. To add a new account, either use the '--add-credential' tool or manually # place a new, valid credential file in the 'oauth_creds/' directory. # # Use the variables below for the ONE-TIME setup to import existing credentials. # After the first successful run, you can clear these paths. # # --- Google Gemini (gcloud CLI) --- # Path to your gcloud ADC file (e.g., ~/.config/gcloud/application_default_credentials.json) # or a credential file from the official 'gemini' CLI (e.g., ~/.gemini/credentials.json). GEMINI_CLI_OAUTH_1="" # --- Qwen / Dashscope (Code Companion) --- # Path to your Qwen credential file (e.g., ~/.qwen/oauth_creds.json). QWEN_CODE_OAUTH_1="" # --- iFlow --- # Path to your iFlow credential file (e.g., ~/.iflow/oauth_creds.json). IFLOW_OAUTH_1="" # ------------------------------------------------------------------------------ # | [ADVANCED] Provider-Specific Settings | # ------------------------------------------------------------------------------ # --- Gemini CLI Project ID --- # Required if you are using the Gemini CLI OAuth provider and the proxy # cannot automatically determine your Google Cloud Project ID. GEMINI_CLI_PROJECT_ID="" # --- Model Ignore Lists --- # Specify a comma-separated list of model names to exclude from a provider's # available models. This is useful for filtering out models you don't want to use. # # Format: IGNORE_MODELS_="model-1,model-2,model-3" # # Example: # IGNORE_MODELS_GEMINI="gemini-1.0-pro-vision-latest,gemini-1.0-pro-latest" # IGNORE_MODELS_OPENAI="gpt-4-turbo,gpt-3.5-turbo-instruct" IGNORE_MODELS_GEMINI="" IGNORE_MODELS_OPENAI="" # --- Model Whitelists (Overrides Blacklists) --- # Specify a comma-separated list of model names to ALWAYS include from a # provider's list. This acts as an override for the ignore list. # # HOW IT WORKS: # 1. A model on a whitelist will ALWAYS be available, even if it's also on an # ignore list (or if the ignore list is set to "*"). # 2. For any models NOT on the whitelist, the standard ignore list logic applies. # # This allows for two main use cases: # - "Pure Whitelist" Mode: Set IGNORE_MODELS_="*" and then specify # only the models you want in WHITELIST_MODELS_. # - "Exemption" Mode: Blacklist a broad range of models (e.g., "*-preview*") # and then use the whitelist to exempt specific preview models you want to test. # # Format: WHITELIST_MODELS_="model-1,model-2" # # Example of a pure whitelist for Gemini: # IGNORE_MODELS_GEMINI="*" # WHITELIST_MODELS_GEMINI="gemini-1.5-pro-latest,gemini-1.5-flash-latest" WHITELIST_MODELS_GEMINI="" WHITELIST_MODELS_OPENAI="" # --- Maximum Concurrent Requests Per Key --- # Controls how many concurrent requests for the SAME model can use the SAME key. # This is useful for providers that can handle concurrent requests without rate limiting. # Default is 1 (no concurrency, current behavior). # # Format: MAX_CONCURRENT_REQUESTS_PER_KEY_= # # Example: # MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=3 # Allow 3 concurrent requests per OpenAI key # MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1 # Allow only 1 request per Gemini key (default) # MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=1 MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1 MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1 MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1 # --- Credential Rotation Mode --- # Controls how credentials are rotated when multiple are available for a provider. # This affects how the proxy selects the next credential to use for requests. # # Available modes: # balanced - (Default) Rotate credentials evenly across requests to distribute load. # Best for API keys with per-minute rate limits. # sequential - Use one credential until it's exhausted (429 error), then switch to next. # Best for credentials with daily/weekly quotas (e.g., free tier accounts). # When a credential hits quota, it's put on cooldown based on the reset time # parsed from the provider's error response. # # Format: ROTATION_MODE_= # # Provider Defaults: # - antigravity: sequential (free tier accounts with daily quotas) # - All others: balanced # # Example: # ROTATION_MODE_GEMINI=sequential # Use Gemini keys until quota exhausted # ROTATION_MODE_OPENAI=balanced # Distribute load across OpenAI keys (default) # ROTATION_MODE_ANTIGRAVITY=balanced # Override Antigravity's sequential default # # ROTATION_MODE_GEMINI=balanced # ROTATION_MODE_ANTIGRAVITY=sequential # --- Priority-Based Concurrency Multipliers --- # Credentials can be assigned to priority tiers (1=highest, 2, 3, etc.). # Each tier can have a concurrency multiplier that increases the effective # concurrent request limit for credentials in that tier. # # How it works: # effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier # # This allows paid/premium credentials to handle more concurrent requests than # free tier credentials, regardless of rotation mode. # # Provider Defaults (built into provider classes): # Antigravity: # Priority 1: 5x (paid ultra tier) # Priority 2: 3x (standard paid tier) # Priority 3+: 2x (sequential mode) or 1x (balanced mode) # Gemini CLI: # Priority 1: 5x # Priority 2: 3x # Others: 1x (all modes) # # Format: CONCURRENCY_MULTIPLIER__PRIORITY_= # # Mode-specific overrides (optional): # Format: CONCURRENCY_MULTIPLIER__PRIORITY__= # # Examples: # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10 # Override P1 to 10x # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1 # Override P3 to 1x # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1 # P2 = 1x in balanced mode only # --- Model Quota Groups --- # Models that share quota/cooldown timing. When one model in a group hits # quota exhausted (429), all models in the group receive the same cooldown timestamp. # They also reset (archive stats) together when the quota period expires. # # This is useful for providers where multiple model variants share the same # underlying quota (e.g., Claude Sonnet and Opus on Antigravity). # # Format: QUOTA_GROUPS__="model1,model2,model3" # # To DISABLE a default group, set it to empty string: # QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="" # # Default groups: # ANTIGRAVITY.CLAUDE: claude-sonnet-4-5,claude-opus-4-5 # # Examples: # QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5" # QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview" # ------------------------------------------------------------------------------ # | [ADVANCED] Proxy Configuration | # ------------------------------------------------------------------------------ # --- OAuth Refresh Interval --- # How often, in seconds, the background refresher should check and refresh # expired OAuth tokens. OAUTH_REFRESH_INTERVAL=600 # Default is 600 seconds (10 minutes) # --- Skip OAuth Initialization --- # Set to "true" to prevent the proxy from performing the interactive OAuth # setup/validation flow on startup. This is highly recommended for non-interactive # environments like Docker containers or automated scripts. # Ensure your credentials in 'oauth_creds/' are valid before enabling this. SKIP_OAUTH_INIT_CHECK=false