Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel

feat(concurrency): ✨ add priority-based concurrency multipliers for credential tiers

aefb706 6 months ago

10.8 kB

	# ==============================================================================
	# \|\| LLM API Key Proxy - Environment Variable Configuration \|\|
	# ==============================================================================
	#
	# This file provides an example configuration for the proxy server.
	# Copy this file to a new file named '.env' in the same directory
	# and replace the placeholder values with your actual credentials and settings.
	#

	# ------------------------------------------------------------------------------
	# \| [REQUIRED] Proxy Server Settings \|
	# ------------------------------------------------------------------------------

	# A secret key used to authenticate requests to THIS proxy server.
	# This can be any string. Your client application must send this key in the
	# 'Authorization' header as a Bearer token (e.g., "Authorization: Bearer YOUR_PROXY_API_KEY").
	PROXY_API_KEY="YOUR_PROXY_API_KEY"


	# ------------------------------------------------------------------------------
	# \| [API KEYS] Provider API Keys \|
	# ------------------------------------------------------------------------------
	#
	# The proxy automatically discovers API keys from environment variables.
	# To add multiple keys for a single provider, increment the number at the end
	# of the variable name (e.g., GEMINI_API_KEY_1, GEMINI_API_KEY_2).
	#
	# The provider name is derived from the part of the variable name before "_API_KEY".
	# For example, 'GEMINI_API_KEY_1' configures the 'gemini' provider.
	#

	# --- Google Gemini ---
	GEMINI_API_KEY_1="YOUR_GEMINI_API_KEY_1"
	GEMINI_API_KEY_2="YOUR_GEMINI_API_KEY_2"

	# --- OpenAI / Azure OpenAI ---
	# For Azure, ensure your key has access to the desired models.
	OPENAI_API_KEY_1="YOUR_OPENAI_OR_AZURE_API_KEY"

	# --- Anthropic (Claude) ---
	ANTHROPIC_API_KEY_1="YOUR_ANTHROPIC_API_KEY"

	# --- OpenRouter ---
	OPENROUTER_API_KEY_1="YOUR_OPENROUTER_API_KEY"

	# --- Groq ---
	GROQ_API_KEY_1="YOUR_GROQ_API_KEY"

	# --- Mistral AI ---
	MISTRAL_API_KEY_1="YOUR_MISTRAL_API_KEY"

	# --- NVIDIA NIM ---
	NVIDIA_API_KEY_1="YOUR_NVIDIA_API_KEY"

	# --- Co:here ---
	COHERE_API_KEY_1="YOUR_COHERE_API_KEY"

	# --- AWS Bedrock ---
	# Note: Bedrock authentication is typically handled via AWS IAM roles or
	# environment variables like AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.
	# Only set this if you are using a specific API key for Bedrock.
	BEDROCK_API_KEY_1=""

	# --- Chutes ---
	CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY"


	# ------------------------------------------------------------------------------
	# \| [OAUTH] Provider OAuth 2.0 Credentials \|
	# ------------------------------------------------------------------------------
	#
	# The proxy now uses a "local-first" approach for OAuth credentials.
	# All OAuth credentials are managed within the 'oauth_creds/' directory.
	#
	# HOW IT WORKS:
	# 1. On the first run, if you provide a path to an existing credential file
	# (e.g., from ~/.gemini/), the proxy will COPY it into the local
	# 'oauth_creds/' directory with a standardized name (e.g., 'gemini_cli_oauth_1.json').
	# 2. On all subsequent runs, the proxy will ONLY use the files found inside
	# 'oauth_creds/'. It will no longer scan system-wide directories.
	# 3. To add a new account, either use the '--add-credential' tool or manually
	# place a new, valid credential file in the 'oauth_creds/' directory.
	#
	# Use the variables below for the ONE-TIME setup to import existing credentials.
	# After the first successful run, you can clear these paths.
	#

	# --- Google Gemini (gcloud CLI) ---
	# Path to your gcloud ADC file (e.g., ~/.config/gcloud/application_default_credentials.json)
	# or a credential file from the official 'gemini' CLI (e.g., ~/.gemini/credentials.json).
	GEMINI_CLI_OAUTH_1=""

	# --- Qwen / Dashscope (Code Companion) ---
	# Path to your Qwen credential file (e.g., ~/.qwen/oauth_creds.json).
	QWEN_CODE_OAUTH_1=""

	# --- iFlow ---
	# Path to your iFlow credential file (e.g., ~/.iflow/oauth_creds.json).
	IFLOW_OAUTH_1=""


	# ------------------------------------------------------------------------------
	# \| [ADVANCED] Provider-Specific Settings \|
	# ------------------------------------------------------------------------------

	# --- Gemini CLI Project ID ---
	# Required if you are using the Gemini CLI OAuth provider and the proxy
	# cannot automatically determine your Google Cloud Project ID.
	GEMINI_CLI_PROJECT_ID=""

	# --- Model Ignore Lists ---
	# Specify a comma-separated list of model names to exclude from a provider's
	# available models. This is useful for filtering out models you don't want to use.
	#
	# Format: IGNORE_MODELS_<PROVIDER_NAME>="model-1,model-2,model-3"
	#
	# Example:
	# IGNORE_MODELS_GEMINI="gemini-1.0-pro-vision-latest,gemini-1.0-pro-latest"
	# IGNORE_MODELS_OPENAI="gpt-4-turbo,gpt-3.5-turbo-instruct"
	IGNORE_MODELS_GEMINI=""
	IGNORE_MODELS_OPENAI=""

	# --- Model Whitelists (Overrides Blacklists) ---
	# Specify a comma-separated list of model names to ALWAYS include from a
	# provider's list. This acts as an override for the ignore list.
	#
	# HOW IT WORKS:
	# 1. A model on a whitelist will ALWAYS be available, even if it's also on an
	# ignore list (or if the ignore list is set to "*").
	# 2. For any models NOT on the whitelist, the standard ignore list logic applies.
	#
	# This allows for two main use cases:
	# - "Pure Whitelist" Mode: Set IGNORE_MODELS_<PROVIDER>="*" and then specify
	# only the models you want in WHITELIST_MODELS_<PROVIDER>.
	# - "Exemption" Mode: Blacklist a broad range of models (e.g., "-preview")
	# and then use the whitelist to exempt specific preview models you want to test.
	#
	# Format: WHITELIST_MODELS_<PROVIDER_NAME>="model-1,model-2"
	#
	# Example of a pure whitelist for Gemini:
	# IGNORE_MODELS_GEMINI="*"
	# WHITELIST_MODELS_GEMINI="gemini-1.5-pro-latest,gemini-1.5-flash-latest"
	WHITELIST_MODELS_GEMINI=""
	WHITELIST_MODELS_OPENAI=""

	# --- Maximum Concurrent Requests Per Key ---
	# Controls how many concurrent requests for the SAME model can use the SAME key.
	# This is useful for providers that can handle concurrent requests without rate limiting.
	# Default is 1 (no concurrency, current behavior).
	#
	# Format: MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER_NAME>=<number>
	#
	# Example:
	# MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=3 # Allow 3 concurrent requests per OpenAI key
	# MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1 # Allow only 1 request per Gemini key (default)
	#
	MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=1
	MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1
	MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
	MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1

	# --- Credential Rotation Mode ---
	# Controls how credentials are rotated when multiple are available for a provider.
	# This affects how the proxy selects the next credential to use for requests.
	#
	# Available modes:
	# balanced - (Default) Rotate credentials evenly across requests to distribute load.
	# Best for API keys with per-minute rate limits.
	# sequential - Use one credential until it's exhausted (429 error), then switch to next.
	# Best for credentials with daily/weekly quotas (e.g., free tier accounts).
	# When a credential hits quota, it's put on cooldown based on the reset time
	# parsed from the provider's error response.
	#
	# Format: ROTATION_MODE_<PROVIDER_NAME>=<mode>
	#
	# Provider Defaults:
	# - antigravity: sequential (free tier accounts with daily quotas)
	# - All others: balanced
	#
	# Example:
	# ROTATION_MODE_GEMINI=sequential # Use Gemini keys until quota exhausted
	# ROTATION_MODE_OPENAI=balanced # Distribute load across OpenAI keys (default)
	# ROTATION_MODE_ANTIGRAVITY=balanced # Override Antigravity's sequential default
	#
	# ROTATION_MODE_GEMINI=balanced
	# ROTATION_MODE_ANTIGRAVITY=sequential

	# --- Priority-Based Concurrency Multipliers ---
	# Credentials can be assigned to priority tiers (1=highest, 2, 3, etc.).
	# Each tier can have a concurrency multiplier that increases the effective
	# concurrent request limit for credentials in that tier.
	#
	# How it works:
	# effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier
	#
	# This allows paid/premium credentials to handle more concurrent requests than
	# free tier credentials, regardless of rotation mode.
	#
	# Provider Defaults (built into provider classes):
	# Antigravity:
	# Priority 1: 5x (paid ultra tier)
	# Priority 2: 3x (standard paid tier)
	# Priority 3+: 2x (sequential mode) or 1x (balanced mode)
	# Gemini CLI:
	# Priority 1: 5x
	# Priority 2: 3x
	# Others: 1x (all modes)
	#
	# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
	#
	# Mode-specific overrides (optional):
	# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
	#
	# Examples:
	# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10 # Override P1 to 10x
	# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1 # Override P3 to 1x
	# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1 # P2 = 1x in balanced mode only

	# --- Model Quota Groups ---
	# Models that share quota/cooldown timing. When one model in a group hits
	# quota exhausted (429), all models in the group receive the same cooldown timestamp.
	# They also reset (archive stats) together when the quota period expires.
	#
	# This is useful for providers where multiple model variants share the same
	# underlying quota (e.g., Claude Sonnet and Opus on Antigravity).
	#
	# Format: QUOTA_GROUPS_<PROVIDER>_<GROUP>="model1,model2,model3"
	#
	# To DISABLE a default group, set it to empty string:
	# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
	#
	# Default groups:
	# ANTIGRAVITY.CLAUDE: claude-sonnet-4-5,claude-opus-4-5
	#
	# Examples:
	# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
	# QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"

	# ------------------------------------------------------------------------------
	# \| [ADVANCED] Proxy Configuration \|
	# ------------------------------------------------------------------------------

	# --- OAuth Refresh Interval ---
	# How often, in seconds, the background refresher should check and refresh
	# expired OAuth tokens.
	OAUTH_REFRESH_INTERVAL=600 # Default is 600 seconds (10 minutes)

	# --- Skip OAuth Initialization ---
	# Set to "true" to prevent the proxy from performing the interactive OAuth
	# setup/validation flow on startup. This is highly recommended for non-interactive
	# environments like Docker containers or automated scripts.
	# Ensure your credentials in 'oauth_creds/' are valid before enabling this.
	SKIP_OAUTH_INIT_CHECK=false