Spaces:
Paused
Paused
Mirrowel
feat(concurrency): ✨ add priority-based concurrency multipliers for credential tiers
aefb706
| # ============================================================================== | |
| # || LLM API Key Proxy - Environment Variable Configuration || | |
| # ============================================================================== | |
| # | |
| # This file provides an example configuration for the proxy server. | |
| # Copy this file to a new file named '.env' in the same directory | |
| # and replace the placeholder values with your actual credentials and settings. | |
| # | |
| # ------------------------------------------------------------------------------ | |
| # | [REQUIRED] Proxy Server Settings | | |
| # ------------------------------------------------------------------------------ | |
| # A secret key used to authenticate requests to THIS proxy server. | |
| # This can be any string. Your client application must send this key in the | |
| # 'Authorization' header as a Bearer token (e.g., "Authorization: Bearer YOUR_PROXY_API_KEY"). | |
| PROXY_API_KEY="YOUR_PROXY_API_KEY" | |
| # ------------------------------------------------------------------------------ | |
| # | [API KEYS] Provider API Keys | | |
| # ------------------------------------------------------------------------------ | |
| # | |
| # The proxy automatically discovers API keys from environment variables. | |
| # To add multiple keys for a single provider, increment the number at the end | |
| # of the variable name (e.g., GEMINI_API_KEY_1, GEMINI_API_KEY_2). | |
| # | |
| # The provider name is derived from the part of the variable name before "_API_KEY". | |
| # For example, 'GEMINI_API_KEY_1' configures the 'gemini' provider. | |
| # | |
| # --- Google Gemini --- | |
| GEMINI_API_KEY_1="YOUR_GEMINI_API_KEY_1" | |
| GEMINI_API_KEY_2="YOUR_GEMINI_API_KEY_2" | |
| # --- OpenAI / Azure OpenAI --- | |
| # For Azure, ensure your key has access to the desired models. | |
| OPENAI_API_KEY_1="YOUR_OPENAI_OR_AZURE_API_KEY" | |
| # --- Anthropic (Claude) --- | |
| ANTHROPIC_API_KEY_1="YOUR_ANTHROPIC_API_KEY" | |
| # --- OpenRouter --- | |
| OPENROUTER_API_KEY_1="YOUR_OPENROUTER_API_KEY" | |
| # --- Groq --- | |
| GROQ_API_KEY_1="YOUR_GROQ_API_KEY" | |
| # --- Mistral AI --- | |
| MISTRAL_API_KEY_1="YOUR_MISTRAL_API_KEY" | |
| # --- NVIDIA NIM --- | |
| NVIDIA_API_KEY_1="YOUR_NVIDIA_API_KEY" | |
| # --- Co:here --- | |
| COHERE_API_KEY_1="YOUR_COHERE_API_KEY" | |
| # --- AWS Bedrock --- | |
| # Note: Bedrock authentication is typically handled via AWS IAM roles or | |
| # environment variables like AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY. | |
| # Only set this if you are using a specific API key for Bedrock. | |
| BEDROCK_API_KEY_1="" | |
| # --- Chutes --- | |
| CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY" | |
| # ------------------------------------------------------------------------------ | |
| # | [OAUTH] Provider OAuth 2.0 Credentials | | |
| # ------------------------------------------------------------------------------ | |
| # | |
| # The proxy now uses a "local-first" approach for OAuth credentials. | |
| # All OAuth credentials are managed within the 'oauth_creds/' directory. | |
| # | |
| # HOW IT WORKS: | |
| # 1. On the first run, if you provide a path to an existing credential file | |
| # (e.g., from ~/.gemini/), the proxy will COPY it into the local | |
| # 'oauth_creds/' directory with a standardized name (e.g., 'gemini_cli_oauth_1.json'). | |
| # 2. On all subsequent runs, the proxy will ONLY use the files found inside | |
| # 'oauth_creds/'. It will no longer scan system-wide directories. | |
| # 3. To add a new account, either use the '--add-credential' tool or manually | |
| # place a new, valid credential file in the 'oauth_creds/' directory. | |
| # | |
| # Use the variables below for the ONE-TIME setup to import existing credentials. | |
| # After the first successful run, you can clear these paths. | |
| # | |
| # --- Google Gemini (gcloud CLI) --- | |
| # Path to your gcloud ADC file (e.g., ~/.config/gcloud/application_default_credentials.json) | |
| # or a credential file from the official 'gemini' CLI (e.g., ~/.gemini/credentials.json). | |
| GEMINI_CLI_OAUTH_1="" | |
| # --- Qwen / Dashscope (Code Companion) --- | |
| # Path to your Qwen credential file (e.g., ~/.qwen/oauth_creds.json). | |
| QWEN_CODE_OAUTH_1="" | |
| # --- iFlow --- | |
| # Path to your iFlow credential file (e.g., ~/.iflow/oauth_creds.json). | |
| IFLOW_OAUTH_1="" | |
| # ------------------------------------------------------------------------------ | |
| # | [ADVANCED] Provider-Specific Settings | | |
| # ------------------------------------------------------------------------------ | |
| # --- Gemini CLI Project ID --- | |
| # Required if you are using the Gemini CLI OAuth provider and the proxy | |
| # cannot automatically determine your Google Cloud Project ID. | |
| GEMINI_CLI_PROJECT_ID="" | |
| # --- Model Ignore Lists --- | |
| # Specify a comma-separated list of model names to exclude from a provider's | |
| # available models. This is useful for filtering out models you don't want to use. | |
| # | |
| # Format: IGNORE_MODELS_<PROVIDER_NAME>="model-1,model-2,model-3" | |
| # | |
| # Example: | |
| # IGNORE_MODELS_GEMINI="gemini-1.0-pro-vision-latest,gemini-1.0-pro-latest" | |
| # IGNORE_MODELS_OPENAI="gpt-4-turbo,gpt-3.5-turbo-instruct" | |
| IGNORE_MODELS_GEMINI="" | |
| IGNORE_MODELS_OPENAI="" | |
| # --- Model Whitelists (Overrides Blacklists) --- | |
| # Specify a comma-separated list of model names to ALWAYS include from a | |
| # provider's list. This acts as an override for the ignore list. | |
| # | |
| # HOW IT WORKS: | |
| # 1. A model on a whitelist will ALWAYS be available, even if it's also on an | |
| # ignore list (or if the ignore list is set to "*"). | |
| # 2. For any models NOT on the whitelist, the standard ignore list logic applies. | |
| # | |
| # This allows for two main use cases: | |
| # - "Pure Whitelist" Mode: Set IGNORE_MODELS_<PROVIDER>="*" and then specify | |
| # only the models you want in WHITELIST_MODELS_<PROVIDER>. | |
| # - "Exemption" Mode: Blacklist a broad range of models (e.g., "*-preview*") | |
| # and then use the whitelist to exempt specific preview models you want to test. | |
| # | |
| # Format: WHITELIST_MODELS_<PROVIDER_NAME>="model-1,model-2" | |
| # | |
| # Example of a pure whitelist for Gemini: | |
| # IGNORE_MODELS_GEMINI="*" | |
| # WHITELIST_MODELS_GEMINI="gemini-1.5-pro-latest,gemini-1.5-flash-latest" | |
| WHITELIST_MODELS_GEMINI="" | |
| WHITELIST_MODELS_OPENAI="" | |
| # --- Maximum Concurrent Requests Per Key --- | |
| # Controls how many concurrent requests for the SAME model can use the SAME key. | |
| # This is useful for providers that can handle concurrent requests without rate limiting. | |
| # Default is 1 (no concurrency, current behavior). | |
| # | |
| # Format: MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER_NAME>=<number> | |
| # | |
| # Example: | |
| # MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=3 # Allow 3 concurrent requests per OpenAI key | |
| # MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1 # Allow only 1 request per Gemini key (default) | |
| # | |
| MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=1 | |
| MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1 | |
| MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1 | |
| MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1 | |
| # --- Credential Rotation Mode --- | |
| # Controls how credentials are rotated when multiple are available for a provider. | |
| # This affects how the proxy selects the next credential to use for requests. | |
| # | |
| # Available modes: | |
| # balanced - (Default) Rotate credentials evenly across requests to distribute load. | |
| # Best for API keys with per-minute rate limits. | |
| # sequential - Use one credential until it's exhausted (429 error), then switch to next. | |
| # Best for credentials with daily/weekly quotas (e.g., free tier accounts). | |
| # When a credential hits quota, it's put on cooldown based on the reset time | |
| # parsed from the provider's error response. | |
| # | |
| # Format: ROTATION_MODE_<PROVIDER_NAME>=<mode> | |
| # | |
| # Provider Defaults: | |
| # - antigravity: sequential (free tier accounts with daily quotas) | |
| # - All others: balanced | |
| # | |
| # Example: | |
| # ROTATION_MODE_GEMINI=sequential # Use Gemini keys until quota exhausted | |
| # ROTATION_MODE_OPENAI=balanced # Distribute load across OpenAI keys (default) | |
| # ROTATION_MODE_ANTIGRAVITY=balanced # Override Antigravity's sequential default | |
| # | |
| # ROTATION_MODE_GEMINI=balanced | |
| # ROTATION_MODE_ANTIGRAVITY=sequential | |
| # --- Priority-Based Concurrency Multipliers --- | |
| # Credentials can be assigned to priority tiers (1=highest, 2, 3, etc.). | |
| # Each tier can have a concurrency multiplier that increases the effective | |
| # concurrent request limit for credentials in that tier. | |
| # | |
| # How it works: | |
| # effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier | |
| # | |
| # This allows paid/premium credentials to handle more concurrent requests than | |
| # free tier credentials, regardless of rotation mode. | |
| # | |
| # Provider Defaults (built into provider classes): | |
| # Antigravity: | |
| # Priority 1: 5x (paid ultra tier) | |
| # Priority 2: 3x (standard paid tier) | |
| # Priority 3+: 2x (sequential mode) or 1x (balanced mode) | |
| # Gemini CLI: | |
| # Priority 1: 5x | |
| # Priority 2: 3x | |
| # Others: 1x (all modes) | |
| # | |
| # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier> | |
| # | |
| # Mode-specific overrides (optional): | |
| # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier> | |
| # | |
| # Examples: | |
| # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10 # Override P1 to 10x | |
| # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1 # Override P3 to 1x | |
| # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1 # P2 = 1x in balanced mode only | |
| # --- Model Quota Groups --- | |
| # Models that share quota/cooldown timing. When one model in a group hits | |
| # quota exhausted (429), all models in the group receive the same cooldown timestamp. | |
| # They also reset (archive stats) together when the quota period expires. | |
| # | |
| # This is useful for providers where multiple model variants share the same | |
| # underlying quota (e.g., Claude Sonnet and Opus on Antigravity). | |
| # | |
| # Format: QUOTA_GROUPS_<PROVIDER>_<GROUP>="model1,model2,model3" | |
| # | |
| # To DISABLE a default group, set it to empty string: | |
| # QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="" | |
| # | |
| # Default groups: | |
| # ANTIGRAVITY.CLAUDE: claude-sonnet-4-5,claude-opus-4-5 | |
| # | |
| # Examples: | |
| # QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5" | |
| # QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview" | |
| # ------------------------------------------------------------------------------ | |
| # | [ADVANCED] Proxy Configuration | | |
| # ------------------------------------------------------------------------------ | |
| # --- OAuth Refresh Interval --- | |
| # How often, in seconds, the background refresher should check and refresh | |
| # expired OAuth tokens. | |
| OAUTH_REFRESH_INTERVAL=600 # Default is 600 seconds (10 minutes) | |
| # --- Skip OAuth Initialization --- | |
| # Set to "true" to prevent the proxy from performing the interactive OAuth | |
| # setup/validation flow on startup. This is highly recommended for non-interactive | |
| # environments like Docker containers or automated scripts. | |
| # Ensure your credentials in 'oauth_creds/' are valid before enabling this. | |
| SKIP_OAUTH_INIT_CHECK=false |