Spaces:
Paused
Paused
Merge branch 'main' into fix/antigravity-credential-stuck-unavailable
Browse files- .env.example +77 -0
- DOCUMENTATION.md +243 -5
- README.md +49 -0
- src/proxy_app/launcher_tui.py +445 -197
- src/proxy_app/main.py +331 -169
- src/proxy_app/settings_tool.py +918 -273
- src/rotator_library/client.py +303 -26
- src/rotator_library/error_handler.py +76 -2
- src/rotator_library/providers/antigravity_provider.py +261 -54
- src/rotator_library/providers/gemini_cli_provider.py +71 -34
- src/rotator_library/providers/provider_interface.py +405 -12
- src/rotator_library/usage_manager.py +1128 -165
.env.example
CHANGED
|
@@ -159,6 +159,83 @@ MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1
|
|
| 159 |
MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
|
| 160 |
MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
# ------------------------------------------------------------------------------
|
| 163 |
# | [ADVANCED] Proxy Configuration |
|
| 164 |
# ------------------------------------------------------------------------------
|
|
|
|
| 159 |
MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
|
| 160 |
MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
|
| 161 |
|
| 162 |
+
# --- Credential Rotation Mode ---
|
| 163 |
+
# Controls how credentials are rotated when multiple are available for a provider.
|
| 164 |
+
# This affects how the proxy selects the next credential to use for requests.
|
| 165 |
+
#
|
| 166 |
+
# Available modes:
|
| 167 |
+
# balanced - (Default) Rotate credentials evenly across requests to distribute load.
|
| 168 |
+
# Best for API keys with per-minute rate limits.
|
| 169 |
+
# sequential - Use one credential until it's exhausted (429 error), then switch to next.
|
| 170 |
+
# Best for credentials with daily/weekly quotas (e.g., free tier accounts).
|
| 171 |
+
# When a credential hits quota, it's put on cooldown based on the reset time
|
| 172 |
+
# parsed from the provider's error response.
|
| 173 |
+
#
|
| 174 |
+
# Format: ROTATION_MODE_<PROVIDER_NAME>=<mode>
|
| 175 |
+
#
|
| 176 |
+
# Provider Defaults:
|
| 177 |
+
# - antigravity: sequential (free tier accounts with daily quotas)
|
| 178 |
+
# - All others: balanced
|
| 179 |
+
#
|
| 180 |
+
# Example:
|
| 181 |
+
# ROTATION_MODE_GEMINI=sequential # Use Gemini keys until quota exhausted
|
| 182 |
+
# ROTATION_MODE_OPENAI=balanced # Distribute load across OpenAI keys (default)
|
| 183 |
+
# ROTATION_MODE_ANTIGRAVITY=balanced # Override Antigravity's sequential default
|
| 184 |
+
#
|
| 185 |
+
# ROTATION_MODE_GEMINI=balanced
|
| 186 |
+
# ROTATION_MODE_ANTIGRAVITY=sequential
|
| 187 |
+
|
| 188 |
+
# --- Priority-Based Concurrency Multipliers ---
|
| 189 |
+
# Credentials can be assigned to priority tiers (1=highest, 2, 3, etc.).
|
| 190 |
+
# Each tier can have a concurrency multiplier that increases the effective
|
| 191 |
+
# concurrent request limit for credentials in that tier.
|
| 192 |
+
#
|
| 193 |
+
# How it works:
|
| 194 |
+
# effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier
|
| 195 |
+
#
|
| 196 |
+
# This allows paid/premium credentials to handle more concurrent requests than
|
| 197 |
+
# free tier credentials, regardless of rotation mode.
|
| 198 |
+
#
|
| 199 |
+
# Provider Defaults (built into provider classes):
|
| 200 |
+
# Antigravity:
|
| 201 |
+
# Priority 1: 5x (paid ultra tier)
|
| 202 |
+
# Priority 2: 3x (standard paid tier)
|
| 203 |
+
# Priority 3+: 2x (sequential mode) or 1x (balanced mode)
|
| 204 |
+
# Gemini CLI:
|
| 205 |
+
# Priority 1: 5x
|
| 206 |
+
# Priority 2: 3x
|
| 207 |
+
# Others: 1x (all modes)
|
| 208 |
+
#
|
| 209 |
+
# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
|
| 210 |
+
#
|
| 211 |
+
# Mode-specific overrides (optional):
|
| 212 |
+
# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
|
| 213 |
+
#
|
| 214 |
+
# Examples:
|
| 215 |
+
# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10 # Override P1 to 10x
|
| 216 |
+
# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1 # Override P3 to 1x
|
| 217 |
+
# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1 # P2 = 1x in balanced mode only
|
| 218 |
+
|
| 219 |
+
# --- Model Quota Groups ---
|
| 220 |
+
# Models that share quota/cooldown timing. When one model in a group hits
|
| 221 |
+
# quota exhausted (429), all models in the group receive the same cooldown timestamp.
|
| 222 |
+
# They also reset (archive stats) together when the quota period expires.
|
| 223 |
+
#
|
| 224 |
+
# This is useful for providers where multiple model variants share the same
|
| 225 |
+
# underlying quota (e.g., Claude Sonnet and Opus on Antigravity).
|
| 226 |
+
#
|
| 227 |
+
# Format: QUOTA_GROUPS_<PROVIDER>_<GROUP>="model1,model2,model3"
|
| 228 |
+
#
|
| 229 |
+
# To DISABLE a default group, set it to empty string:
|
| 230 |
+
# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
|
| 231 |
+
#
|
| 232 |
+
# Default groups:
|
| 233 |
+
# ANTIGRAVITY.CLAUDE: claude-sonnet-4-5,claude-opus-4-5
|
| 234 |
+
#
|
| 235 |
+
# Examples:
|
| 236 |
+
# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
|
| 237 |
+
# QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
|
| 238 |
+
|
| 239 |
# ------------------------------------------------------------------------------
|
| 240 |
# | [ADVANCED] Proxy Configuration |
|
| 241 |
# ------------------------------------------------------------------------------
|
DOCUMENTATION.md
CHANGED
|
@@ -96,22 +96,30 @@ The `_safe_streaming_wrapper` is a critical component for stability. It:
|
|
| 96 |
|
| 97 |
### 2.2. `usage_manager.py` - Stateful Concurrency & Usage Management
|
| 98 |
|
| 99 |
-
This class is the stateful core of the library, managing concurrency, usage tracking, and
|
| 100 |
|
| 101 |
#### Key Concepts
|
| 102 |
|
| 103 |
* **Async-Native & Lazy-Loaded**: Fully asynchronous, using `aiofiles` for non-blocking file I/O. Usage data is loaded only when needed.
|
| 104 |
* **Fine-Grained Locking**: Each API key has its own `asyncio.Lock` and `asyncio.Condition`. This allows for highly granular control.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
#### Tiered Key Acquisition Strategy
|
| 107 |
|
| 108 |
The `acquire_key` method uses a sophisticated strategy to balance load:
|
| 109 |
|
| 110 |
1. **Filtering**: Keys currently on cooldown (global or model-specific) are excluded.
|
| 111 |
-
2. **
|
|
|
|
|
|
|
|
|
|
| 112 |
* **Tier 1 (Ideal)**: Keys that are completely idle (0 concurrent requests).
|
| 113 |
* **Tier 2 (Acceptable)**: Keys that are busy but still under their configured `MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER>` limit for the requested model. This allows a single key to be used multiple times for the same model, maximizing throughput.
|
| 114 |
-
|
| 115 |
* **Deterministic (tolerance=0.0)**: Within each tier, keys are sorted by daily usage count and the least-used key is always selected. This provides perfect load balance but predictable patterns.
|
| 116 |
* **Weighted Random (tolerance>0, default)**: Keys are selected randomly with weights biased toward less-used ones:
|
| 117 |
- Formula: `weight = (max_usage - credential_usage) + tolerance + 1`
|
|
@@ -119,14 +127,19 @@ The `acquire_key` method uses a sophisticated strategy to balance load:
|
|
| 119 |
- `tolerance=5.0+`: High randomness - even heavily-used credentials have significant probability
|
| 120 |
- **Security Benefit**: Unpredictable selection patterns make rate limit detection and fingerprinting harder
|
| 121 |
- **Load Balance**: Lower-usage credentials still preferred, maintaining reasonable distribution
|
| 122 |
-
|
| 123 |
-
|
| 124 |
|
| 125 |
#### Failure Handling & Cooldowns
|
| 126 |
|
| 127 |
* **Escalating Backoff**: When a failure occurs, the key gets a temporary cooldown for that specific model. Consecutive failures increase this time (10s -> 30s -> 60s -> 120s).
|
| 128 |
* **Key-Level Lockouts**: If a key accumulates failures across multiple distinct models (3+), it is assumed to be dead/revoked and placed on a global 5-minute lockout.
|
| 129 |
* **Authentication Errors**: Immediate 5-minute global lockout.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
### 2.3. `batch_manager.py` - Efficient Request Aggregation
|
| 132 |
|
|
@@ -406,6 +419,10 @@ The most sophisticated provider implementation, supporting Google's internal Ant
|
|
| 406 |
- **Thought Signature Caching**: Server-side caching of encrypted signatures for multi-turn Gemini 3 conversations
|
| 407 |
- **Model-Specific Logic**: Automatic configuration based on model type (Gemini 3, Claude Sonnet, Claude Opus)
|
| 408 |
- **Credential Prioritization**: Automatic tier detection with paid credentials prioritized over free (paid tier resets every 5 hours, free tier resets weekly)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
#### Model Support
|
| 411 |
|
|
@@ -585,6 +602,221 @@ cache/
|
|
| 585 |
|
| 586 |
---
|
| 587 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
### 2.12. Google OAuth Base (`providers/google_oauth_base.py`)
|
| 589 |
|
| 590 |
A refactored, reusable OAuth2 base class that eliminates code duplication across Google-based providers.
|
|
@@ -637,6 +869,12 @@ The library handles provider idiosyncrasies through specialized "Provider" class
|
|
| 637 |
|
| 638 |
The `GeminiCliProvider` is the most complex implementation, mimicking the Google Cloud Code extension.
|
| 639 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
#### Authentication (`gemini_auth_base.py`)
|
| 641 |
|
| 642 |
* **Device Flow**: Uses a standard OAuth 2.0 flow. The `credential_tool` spins up a local web server (`localhost:8085`) to capture the callback from Google's auth page.
|
|
|
|
| 96 |
|
| 97 |
### 2.2. `usage_manager.py` - Stateful Concurrency & Usage Management
|
| 98 |
|
| 99 |
+
This class is the stateful core of the library, managing concurrency, usage tracking, cooldowns, and quota resets.
|
| 100 |
|
| 101 |
#### Key Concepts
|
| 102 |
|
| 103 |
* **Async-Native & Lazy-Loaded**: Fully asynchronous, using `aiofiles` for non-blocking file I/O. Usage data is loaded only when needed.
|
| 104 |
* **Fine-Grained Locking**: Each API key has its own `asyncio.Lock` and `asyncio.Condition`. This allows for highly granular control.
|
| 105 |
+
* **Multiple Reset Modes**: Supports three reset strategies:
|
| 106 |
+
- **per_model**: Each model has independent usage window with authoritative `quota_reset_ts` (from provider errors)
|
| 107 |
+
- **credential**: One window per credential with custom duration (e.g., 5 hours, 7 days)
|
| 108 |
+
- **daily**: Legacy daily reset at `daily_reset_time_utc`
|
| 109 |
+
* **Model Quota Groups**: Models can be grouped to share quota limits. When one model in a group hits quota, all receive the same reset timestamp.
|
| 110 |
|
| 111 |
#### Tiered Key Acquisition Strategy
|
| 112 |
|
| 113 |
The `acquire_key` method uses a sophisticated strategy to balance load:
|
| 114 |
|
| 115 |
1. **Filtering**: Keys currently on cooldown (global or model-specific) are excluded.
|
| 116 |
+
2. **Rotation Mode**: Determines credential selection strategy:
|
| 117 |
+
* **Balanced Mode** (default): Credentials sorted by usage count - least-used first for even distribution
|
| 118 |
+
* **Sequential Mode**: Credentials sorted by usage count descending - most-used first to maintain sticky behavior until exhausted
|
| 119 |
+
3. **Tiering**: Valid keys are split into two tiers:
|
| 120 |
* **Tier 1 (Ideal)**: Keys that are completely idle (0 concurrent requests).
|
| 121 |
* **Tier 2 (Acceptable)**: Keys that are busy but still under their configured `MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER>` limit for the requested model. This allows a single key to be used multiple times for the same model, maximizing throughput.
|
| 122 |
+
4. **Selection Strategy** (configurable via `rotation_tolerance`):
|
| 123 |
* **Deterministic (tolerance=0.0)**: Within each tier, keys are sorted by daily usage count and the least-used key is always selected. This provides perfect load balance but predictable patterns.
|
| 124 |
* **Weighted Random (tolerance>0, default)**: Keys are selected randomly with weights biased toward less-used ones:
|
| 125 |
- Formula: `weight = (max_usage - credential_usage) + tolerance + 1`
|
|
|
|
| 127 |
- `tolerance=5.0+`: High randomness - even heavily-used credentials have significant probability
|
| 128 |
- **Security Benefit**: Unpredictable selection patterns make rate limit detection and fingerprinting harder
|
| 129 |
- **Load Balance**: Lower-usage credentials still preferred, maintaining reasonable distribution
|
| 130 |
+
5. **Concurrency Limits**: Checks against `max_concurrent` limits (with priority multipliers applied) to prevent overloading a single key.
|
| 131 |
+
6. **Priority Groups**: When credential prioritization is enabled, higher-tier credentials (lower priority numbers) are tried first before moving to lower tiers.
|
| 132 |
|
| 133 |
#### Failure Handling & Cooldowns
|
| 134 |
|
| 135 |
* **Escalating Backoff**: When a failure occurs, the key gets a temporary cooldown for that specific model. Consecutive failures increase this time (10s -> 30s -> 60s -> 120s).
|
| 136 |
* **Key-Level Lockouts**: If a key accumulates failures across multiple distinct models (3+), it is assumed to be dead/revoked and placed on a global 5-minute lockout.
|
| 137 |
* **Authentication Errors**: Immediate 5-minute global lockout.
|
| 138 |
+
* **Quota Exhausted Errors**: When a provider returns a quota exhausted error with an authoritative reset timestamp:
|
| 139 |
+
- The `quota_reset_ts` is extracted from the error response (via provider's `parse_quota_error()` method)
|
| 140 |
+
- Applied to the affected model (and all models in its quota group if defined)
|
| 141 |
+
- Cooldown preserved even during daily/window resets until the actual quota reset time
|
| 142 |
+
- Logs show the exact reset time in local timezone with ISO format
|
| 143 |
|
| 144 |
### 2.3. `batch_manager.py` - Efficient Request Aggregation
|
| 145 |
|
|
|
|
| 419 |
- **Thought Signature Caching**: Server-side caching of encrypted signatures for multi-turn Gemini 3 conversations
|
| 420 |
- **Model-Specific Logic**: Automatic configuration based on model type (Gemini 3, Claude Sonnet, Claude Opus)
|
| 421 |
- **Credential Prioritization**: Automatic tier detection with paid credentials prioritized over free (paid tier resets every 5 hours, free tier resets weekly)
|
| 422 |
+
- **Sequential Rotation Mode**: Default rotation mode is sequential (use credentials until exhausted) to maximize thought signature cache hits
|
| 423 |
+
- **Per-Model Quota Tracking**: Each model tracks independent usage windows with authoritative reset timestamps from quota errors
|
| 424 |
+
- **Quota Groups**: Claude models (Sonnet 4.5 + Opus 4.5) can be grouped to share quota limits (disabled by default, configurable via `QUOTA_GROUPS_ANTIGRAVITY_CLAUDE`)
|
| 425 |
+
- **Priority Multipliers**: Paid tier credentials get higher concurrency limits (Priority 1: 5x, Priority 2: 3x, Priority 3+: 2x in sequential mode)
|
| 426 |
|
| 427 |
#### Model Support
|
| 428 |
|
|
|
|
| 602 |
|
| 603 |
---
|
| 604 |
|
| 605 |
+
### 2.13. Sequential Rotation & Per-Model Quota Tracking
|
| 606 |
+
|
| 607 |
+
A comprehensive credential rotation and quota management system introduced in PR #31.
|
| 608 |
+
|
| 609 |
+
#### Rotation Modes
|
| 610 |
+
|
| 611 |
+
Two rotation strategies are available per provider:
|
| 612 |
+
|
| 613 |
+
**Balanced Mode (Default)**:
|
| 614 |
+
- Distributes load evenly across all credentials
|
| 615 |
+
- Least-used credentials selected first
|
| 616 |
+
- Best for providers with per-minute rate limits
|
| 617 |
+
- Prevents any single credential from being overused
|
| 618 |
+
|
| 619 |
+
**Sequential Mode**:
|
| 620 |
+
- Uses one credential until it's exhausted (429 quota error)
|
| 621 |
+
- Switches to next credential only after current one fails
|
| 622 |
+
- Most-used credentials selected first (sticky behavior)
|
| 623 |
+
- Best for providers with daily/weekly quotas
|
| 624 |
+
- Maximizes cache hit rates (e.g., Antigravity thought signatures)
|
| 625 |
+
- Default for Antigravity provider
|
| 626 |
+
|
| 627 |
+
**Configuration**:
|
| 628 |
+
```env
|
| 629 |
+
# Set per provider
|
| 630 |
+
ROTATION_MODE_GEMINI=sequential
|
| 631 |
+
ROTATION_MODE_OPENAI=balanced
|
| 632 |
+
ROTATION_MODE_ANTIGRAVITY=balanced # Override default
|
| 633 |
+
```
|
| 634 |
+
|
| 635 |
+
#### Per-Model Quota Tracking
|
| 636 |
+
|
| 637 |
+
Instead of tracking usage at the credential level, the system now supports granular per-model tracking:
|
| 638 |
+
|
| 639 |
+
**Data Structure** (when `mode="per_model"`):
|
| 640 |
+
```json
|
| 641 |
+
{
|
| 642 |
+
"credential_id": {
|
| 643 |
+
"models": {
|
| 644 |
+
"gemini-2.5-pro": {
|
| 645 |
+
"window_start_ts": 1733678400.0,
|
| 646 |
+
"quota_reset_ts": 1733696400.0,
|
| 647 |
+
"success_count": 15,
|
| 648 |
+
"prompt_tokens": 5000,
|
| 649 |
+
"completion_tokens": 1000,
|
| 650 |
+
"approx_cost": 0.05,
|
| 651 |
+
"window_started": "2025-12-08 14:00:00 +0100",
|
| 652 |
+
"quota_resets": "2025-12-08 19:00:00 +0100"
|
| 653 |
+
}
|
| 654 |
+
},
|
| 655 |
+
"global": {...},
|
| 656 |
+
"model_cooldowns": {...}
|
| 657 |
+
}
|
| 658 |
+
}
|
| 659 |
+
```
|
| 660 |
+
|
| 661 |
+
**Key Features**:
|
| 662 |
+
- Each model tracks its own usage window independently
|
| 663 |
+
- `window_start_ts`: When the current quota period started
|
| 664 |
+
- `quota_reset_ts`: Authoritative reset time from provider error response
|
| 665 |
+
- Human-readable timestamps added for debugging
|
| 666 |
+
- Supports custom window durations (5h, 7d, etc.)
|
| 667 |
+
|
| 668 |
+
#### Provider-Specific Quota Parsing
|
| 669 |
+
|
| 670 |
+
Providers can implement `parse_quota_error()` to extract precise reset times from error responses:
|
| 671 |
+
|
| 672 |
+
```python
|
| 673 |
+
@staticmethod
|
| 674 |
+
def parse_quota_error(error, error_body) -> Optional[Dict]:
|
| 675 |
+
"""Extract quota reset timestamp from provider error.
|
| 676 |
+
|
| 677 |
+
Returns:
|
| 678 |
+
{
|
| 679 |
+
'quota_reset_timestamp': 1733696400.0, # Unix timestamp
|
| 680 |
+
'retry_after': 18000 # Seconds until reset
|
| 681 |
+
}
|
| 682 |
+
"""
|
| 683 |
+
```
|
| 684 |
+
|
| 685 |
+
**Google RPC Format** (Antigravity, Gemini CLI):
|
| 686 |
+
- Parses `RetryInfo` and `ErrorInfo` from error details
|
| 687 |
+
- Handles duration strings: `"143h4m52.73s"` or `"515092.73s"`
|
| 688 |
+
- Extracts `quotaResetTimeStamp` and converts to Unix timestamp
|
| 689 |
+
- Falls back to `quotaResetDelay` if timestamp not available
|
| 690 |
+
|
| 691 |
+
**Example Error Response**:
|
| 692 |
+
```json
|
| 693 |
+
{
|
| 694 |
+
"error": {
|
| 695 |
+
"code": 429,
|
| 696 |
+
"message": "Quota exceeded",
|
| 697 |
+
"details": [{
|
| 698 |
+
"@type": "type.googleapis.com/google.rpc.RetryInfo",
|
| 699 |
+
"retryDelay": "143h4m52.73s"
|
| 700 |
+
}, {
|
| 701 |
+
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
| 702 |
+
"metadata": {
|
| 703 |
+
"quotaResetTimeStamp": "2025-12-08T19:00:00Z"
|
| 704 |
+
}
|
| 705 |
+
}]
|
| 706 |
+
}
|
| 707 |
+
}
|
| 708 |
+
```
|
| 709 |
+
|
| 710 |
+
#### Model Quota Groups
|
| 711 |
+
|
| 712 |
+
Models that share the same quota limits can be grouped:
|
| 713 |
+
|
| 714 |
+
**Configuration**:
|
| 715 |
+
```env
|
| 716 |
+
# Models in a group share quota/cooldown timing
|
| 717 |
+
QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
|
| 718 |
+
|
| 719 |
+
# To disable a default group:
|
| 720 |
+
QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
|
| 721 |
+
```
|
| 722 |
+
|
| 723 |
+
**Behavior**:
|
| 724 |
+
- When one model hits quota, all models in the group receive the same `quota_reset_ts`
|
| 725 |
+
- Combined weighted usage for credential selection (e.g., Opus counts 2x vs Sonnet)
|
| 726 |
+
- Group resets only when ALL models' quotas have reset
|
| 727 |
+
- Preserves unexpired cooldowns during other resets
|
| 728 |
+
|
| 729 |
+
**Provider Implementation**:
|
| 730 |
+
```python
|
| 731 |
+
class AntigravityProvider(ProviderInterface):
|
| 732 |
+
model_quota_groups = {
|
| 733 |
+
"claude": ["claude-sonnet-4-5", "claude-opus-4-5"]
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
model_usage_weights = {
|
| 737 |
+
"claude-opus-4-5": 2 # Opus counts 2x vs Sonnet
|
| 738 |
+
}
|
| 739 |
+
```
|
| 740 |
+
|
| 741 |
+
#### Priority-Based Concurrency Multipliers
|
| 742 |
+
|
| 743 |
+
Credentials can be assigned to priority tiers with configurable concurrency limits:
|
| 744 |
+
|
| 745 |
+
**Configuration**:
|
| 746 |
+
```env
|
| 747 |
+
# Universal multipliers (all modes)
|
| 748 |
+
CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10
|
| 749 |
+
CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2=3
|
| 750 |
+
|
| 751 |
+
# Mode-specific overrides
|
| 752 |
+
CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1 # Lower in balanced mode
|
| 753 |
+
```
|
| 754 |
+
|
| 755 |
+
**How it works**:
|
| 756 |
+
```python
|
| 757 |
+
effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier
|
| 758 |
+
```
|
| 759 |
+
|
| 760 |
+
**Provider Defaults** (Antigravity):
|
| 761 |
+
- Priority 1 (paid ultra): 5x multiplier
|
| 762 |
+
- Priority 2 (standard paid): 3x multiplier
|
| 763 |
+
- Priority 3+ (free): 2x (sequential mode) or 1x (balanced mode)
|
| 764 |
+
|
| 765 |
+
**Benefits**:
|
| 766 |
+
- Paid credentials handle more load without manual configuration
|
| 767 |
+
- Different concurrency for different rotation modes
|
| 768 |
+
- Automatic tier detection based on credential properties
|
| 769 |
+
|
| 770 |
+
#### Reset Window Configuration
|
| 771 |
+
|
| 772 |
+
Providers can specify custom reset windows per priority tier:
|
| 773 |
+
|
| 774 |
+
```python
|
| 775 |
+
class AntigravityProvider(ProviderInterface):
|
| 776 |
+
usage_reset_configs = {
|
| 777 |
+
frozenset([1, 2]): UsageResetConfigDef(
|
| 778 |
+
mode="per_model",
|
| 779 |
+
window_hours=5, # 5-hour rolling window for paid tiers
|
| 780 |
+
field_name="5h_window"
|
| 781 |
+
),
|
| 782 |
+
frozenset([3, 4, 5]): UsageResetConfigDef(
|
| 783 |
+
mode="per_model",
|
| 784 |
+
window_hours=168, # 7-day window for free tier
|
| 785 |
+
field_name="7d_window"
|
| 786 |
+
)
|
| 787 |
+
}
|
| 788 |
+
```
|
| 789 |
+
|
| 790 |
+
**Supported Modes**:
|
| 791 |
+
- `per_model`: Independent window per model with authoritative reset times
|
| 792 |
+
- `credential`: Single window per credential (legacy)
|
| 793 |
+
- `daily`: Daily reset at configured UTC hour (legacy)
|
| 794 |
+
|
| 795 |
+
#### Usage Flow
|
| 796 |
+
|
| 797 |
+
1. **Request arrives** for model X with credential Y
|
| 798 |
+
2. **Check rotation mode**: Sequential or balanced?
|
| 799 |
+
3. **Select credential**:
|
| 800 |
+
- Filter by priority tier requirements
|
| 801 |
+
- Apply concurrency multiplier for effective limit
|
| 802 |
+
- Sort by rotation mode strategy
|
| 803 |
+
4. **Check quota**:
|
| 804 |
+
- Load model's usage data
|
| 805 |
+
- Check if within window (window_start_ts to quota_reset_ts)
|
| 806 |
+
- Check model quota groups for combined usage
|
| 807 |
+
5. **Execute request**
|
| 808 |
+
6. **On success**: Increment model usage count
|
| 809 |
+
7. **On quota error**:
|
| 810 |
+
- Parse error for `quota_reset_ts`
|
| 811 |
+
- Apply to model (and quota group)
|
| 812 |
+
- Credential remains on cooldown until reset time
|
| 813 |
+
8. **On window expiration**:
|
| 814 |
+
- Archive model data to global stats
|
| 815 |
+
- Start fresh window with new `window_start_ts`
|
| 816 |
+
- Preserve unexpired quota cooldowns
|
| 817 |
+
|
| 818 |
+
---
|
| 819 |
+
|
| 820 |
### 2.12. Google OAuth Base (`providers/google_oauth_base.py`)
|
| 821 |
|
| 822 |
A refactored, reusable OAuth2 base class that eliminates code duplication across Google-based providers.
|
|
|
|
| 869 |
|
| 870 |
The `GeminiCliProvider` is the most complex implementation, mimicking the Google Cloud Code extension.
|
| 871 |
|
| 872 |
+
**New in PR #31**:
|
| 873 |
+
- **Quota Parsing**: Implements `parse_quota_error()` using Google RPC format parser
|
| 874 |
+
- **Tier Configuration**: Defines `tier_priorities` and `usage_reset_configs` for automatic priority resolution
|
| 875 |
+
- **Balanced Rotation**: Defaults to balanced mode (unlike Antigravity which uses sequential)
|
| 876 |
+
- **Priority Multipliers**: Same as Antigravity (P1: 5x, P2: 3x, others: 1x)
|
| 877 |
+
|
| 878 |
#### Authentication (`gemini_auth_base.py`)
|
| 879 |
|
| 880 |
* **Device Flow**: Uses a standard OAuth 2.0 flow. The `credential_tool` spins up a local web server (`localhost:8085`) to capture the callback from Google's auth page.
|
README.md
CHANGED
|
@@ -38,6 +38,12 @@ This project provides a powerful solution for developers building complex applic
|
|
| 38 |
- Automatic thinking block sanitization for Claude models (with recovery strategies)
|
| 39 |
- Note: Claude thinking mode requires careful conversation state management (see [Antigravity documentation](DOCUMENTATION.md#antigravity-claude-extended-thinking-sanitization) for details)
|
| 40 |
- **🆕 Credential Prioritization**: Automatic tier detection and priority-based credential selection ensures paid-tier credentials are used for premium models that require them.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
- **🆕 Weighted Random Rotation**: Configurable credential rotation strategy - choose between deterministic (perfect balance) or weighted random (unpredictable, harder to fingerprint) selection.
|
| 42 |
- **🆕 Enhanced Gemini CLI**: Improved project discovery, paid vs free tier detection, and Gemini 3 support with thoughtSignature caching.
|
| 43 |
- **🆕 Temperature Override**: Global temperature=0 override option to prevent tool hallucination issues with low-temperature settings.
|
|
@@ -129,6 +135,8 @@ The proxy now includes a powerful **interactive Text User Interface (TUI)** that
|
|
| 129 |
- Configure custom OpenAI-compatible providers
|
| 130 |
- Define provider models (simple or advanced JSON format)
|
| 131 |
- Set concurrency limits per provider
|
|
|
|
|
|
|
| 132 |
- Interactive numbered menus for easy selection
|
| 133 |
- Pending changes system with save/discard options
|
| 134 |
|
|
@@ -545,6 +553,47 @@ ANTIGRAVITY_GEMINI3_TOOL_FIX=true # Prevent tool hallucination
|
|
| 545 |
|
| 546 |
```
|
| 547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
#### Concurrency Control
|
| 549 |
|
| 550 |
- **`MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER>`**: Set the maximum number of simultaneous requests allowed per API key for a specific provider. Default is `1` (no concurrency). Useful for high-throughput providers.
|
|
|
|
| 38 |
- Automatic thinking block sanitization for Claude models (with recovery strategies)
|
| 39 |
- Note: Claude thinking mode requires careful conversation state management (see [Antigravity documentation](DOCUMENTATION.md#antigravity-claude-extended-thinking-sanitization) for details)
|
| 40 |
- **🆕 Credential Prioritization**: Automatic tier detection and priority-based credential selection ensures paid-tier credentials are used for premium models that require them.
|
| 41 |
+
- **🆕 Sequential Rotation Mode**: Choose between balanced (distribute load evenly) or sequential (use until exhausted) credential rotation strategies. Sequential mode maximizes cache hit rates for providers like Antigravity.
|
| 42 |
+
- **🆕 Per-Model Quota Tracking**: Granular per-model usage tracking with authoritative quota reset timestamps from provider error responses. Each model maintains its own window with `window_start_ts` and `quota_reset_ts`.
|
| 43 |
+
- **🆕 Model Quota Groups**: Group models that share quota limits (e.g., Claude Sonnet and Opus). When one model in a group hits quota, all receive the same cooldown timestamp.
|
| 44 |
+
- **🆕 Priority-Based Concurrency**: Assign credentials to priority tiers (1=highest) with configurable concurrency multipliers. Paid-tier credentials can handle more concurrent requests than free-tier ones.
|
| 45 |
+
- **🆕 Provider-Specific Quota Parsing**: Extended provider interface with `parse_quota_error()` method to extract precise retry-after times from provider-specific error formats (e.g., Google RPC format).
|
| 46 |
+
- **🆕 Flexible Rolling Windows**: Support for provider-specific quota reset configurations (5-hour, 7-day, etc.) replacing hardcoded daily resets.
|
| 47 |
- **🆕 Weighted Random Rotation**: Configurable credential rotation strategy - choose between deterministic (perfect balance) or weighted random (unpredictable, harder to fingerprint) selection.
|
| 48 |
- **🆕 Enhanced Gemini CLI**: Improved project discovery, paid vs free tier detection, and Gemini 3 support with thoughtSignature caching.
|
| 49 |
- **🆕 Temperature Override**: Global temperature=0 override option to prevent tool hallucination issues with low-temperature settings.
|
|
|
|
| 135 |
- Configure custom OpenAI-compatible providers
|
| 136 |
- Define provider models (simple or advanced JSON format)
|
| 137 |
- Set concurrency limits per provider
|
| 138 |
+
- Configure rotation modes (balanced vs sequential)
|
| 139 |
+
- Manage priority-based concurrency multipliers
|
| 140 |
- Interactive numbered menus for easy selection
|
| 141 |
- Pending changes system with save/discard options
|
| 142 |
|
|
|
|
| 553 |
|
| 554 |
```
|
| 555 |
|
| 556 |
+
#### Credential Rotation Modes
|
| 557 |
+
|
| 558 |
+
- **`ROTATION_MODE_<PROVIDER>`**: Controls how credentials are rotated when multiple are available. Default: `balanced` (except Antigravity which defaults to `sequential`).
|
| 559 |
+
- `balanced`: Rotate credentials evenly across requests to distribute load. Best for per-minute rate limits.
|
| 560 |
+
- `sequential`: Use one credential until exhausted (429 error), then switch to next. Best for daily/weekly quotas.
|
| 561 |
+
```env
|
| 562 |
+
ROTATION_MODE_GEMINI=sequential # Use Gemini keys until quota exhausted
|
| 563 |
+
ROTATION_MODE_OPENAI=balanced # Distribute load across OpenAI keys (default)
|
| 564 |
+
ROTATION_MODE_ANTIGRAVITY=balanced # Override Antigravity's sequential default
|
| 565 |
+
```
|
| 566 |
+
|
| 567 |
+
#### Priority-Based Concurrency Multipliers
|
| 568 |
+
|
| 569 |
+
- **`CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>`**: Assign concurrency multipliers to priority tiers. Higher-tier credentials handle more concurrent requests.
|
| 570 |
+
```env
|
| 571 |
+
# Universal multipliers (apply to all rotation modes)
|
| 572 |
+
CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10 # 10x for paid ultra tier
|
| 573 |
+
CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1 # 1x for lower tiers
|
| 574 |
+
|
| 575 |
+
# Mode-specific overrides
|
| 576 |
+
CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1 # P2 = 1x in balanced mode only
|
| 577 |
+
```
|
| 578 |
+
|
| 579 |
+
**Provider Defaults** (built into provider classes):
|
| 580 |
+
- **Antigravity**: Priority 1: 5x, Priority 2: 3x, Priority 3+: 2x (sequential) or 1x (balanced)
|
| 581 |
+
- **Gemini CLI**: Priority 1: 5x, Priority 2: 3x, Others: 1x
|
| 582 |
+
|
| 583 |
+
#### Model Quota Groups
|
| 584 |
+
|
| 585 |
+
- **`QUOTA_GROUPS_<PROVIDER>_<GROUP>`**: Define models that share quota/cooldown timing. When one model hits quota, all in the group receive the same cooldown timestamp.
|
| 586 |
+
```env
|
| 587 |
+
QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
|
| 588 |
+
QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
|
| 589 |
+
|
| 590 |
+
# To disable a default group:
|
| 591 |
+
QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
|
| 592 |
+
```
|
| 593 |
+
|
| 594 |
+
**Default Groups**:
|
| 595 |
+
- **Antigravity**: Claude group (Sonnet 4.5 + Opus 4.5) with Opus counting 2x vs Sonnet
|
| 596 |
+
|
| 597 |
#### Concurrency Control
|
| 598 |
|
| 599 |
- **`MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER>`**: Set the maximum number of simultaneous requests allowed per API key for a specific provider. Default is `1` (no concurrency). Useful for high-throughput providers.
|
src/proxy_app/launcher_tui.py
CHANGED
|
@@ -18,32 +18,33 @@ console = Console()
|
|
| 18 |
|
| 19 |
def clear_screen():
|
| 20 |
"""
|
| 21 |
-
Cross-platform terminal clear that works robustly on both
|
| 22 |
classic Windows conhost and modern terminals (Windows Terminal, Linux, Mac).
|
| 23 |
-
|
| 24 |
Uses native OS commands instead of ANSI escape sequences:
|
| 25 |
- Windows (conhost & Windows Terminal): cls
|
| 26 |
- Unix-like systems (Linux, Mac): clear
|
| 27 |
"""
|
| 28 |
-
os.system(
|
|
|
|
| 29 |
|
| 30 |
class LauncherConfig:
|
| 31 |
"""Manages launcher_config.json (host, port, logging only)"""
|
| 32 |
-
|
| 33 |
def __init__(self, config_path: Path = Path("launcher_config.json")):
|
| 34 |
self.config_path = config_path
|
| 35 |
self.defaults = {
|
| 36 |
"host": "127.0.0.1",
|
| 37 |
"port": 8000,
|
| 38 |
-
"enable_request_logging": False
|
| 39 |
}
|
| 40 |
self.config = self.load()
|
| 41 |
-
|
| 42 |
def load(self) -> dict:
|
| 43 |
"""Load config from file or create with defaults."""
|
| 44 |
if self.config_path.exists():
|
| 45 |
try:
|
| 46 |
-
with open(self.config_path,
|
| 47 |
config = json.load(f)
|
| 48 |
# Merge with defaults for any missing keys
|
| 49 |
for key, value in self.defaults.items():
|
|
@@ -53,22 +54,23 @@ class LauncherConfig:
|
|
| 53 |
except (json.JSONDecodeError, IOError):
|
| 54 |
return self.defaults.copy()
|
| 55 |
return self.defaults.copy()
|
| 56 |
-
|
| 57 |
def save(self):
|
| 58 |
"""Save current config to file."""
|
| 59 |
import datetime
|
|
|
|
| 60 |
self.config["last_updated"] = datetime.datetime.now().isoformat()
|
| 61 |
try:
|
| 62 |
-
with open(self.config_path,
|
| 63 |
json.dump(self.config, f, indent=2)
|
| 64 |
except IOError as e:
|
| 65 |
console.print(f"[red]Error saving config: {e}[/red]")
|
| 66 |
-
|
| 67 |
def update(self, **kwargs):
|
| 68 |
"""Update config values."""
|
| 69 |
self.config.update(kwargs)
|
| 70 |
self.save()
|
| 71 |
-
|
| 72 |
@staticmethod
|
| 73 |
def update_proxy_api_key(new_key: str):
|
| 74 |
"""Update PROXY_API_KEY in .env only"""
|
|
@@ -79,7 +81,7 @@ class LauncherConfig:
|
|
| 79 |
|
| 80 |
class SettingsDetector:
|
| 81 |
"""Detects settings from .env for display"""
|
| 82 |
-
|
| 83 |
@staticmethod
|
| 84 |
def _load_local_env() -> dict:
|
| 85 |
"""Load environment variables from local .env file only"""
|
|
@@ -88,13 +90,13 @@ class SettingsDetector:
|
|
| 88 |
if not env_file.exists():
|
| 89 |
return env_dict
|
| 90 |
try:
|
| 91 |
-
with open(env_file,
|
| 92 |
for line in f:
|
| 93 |
line = line.strip()
|
| 94 |
-
if not line or line.startswith(
|
| 95 |
continue
|
| 96 |
-
if
|
| 97 |
-
key, _, value = line.partition(
|
| 98 |
key, value = key.strip(), value.strip()
|
| 99 |
if value and value[0] in ('"', "'") and value[-1] == value[0]:
|
| 100 |
value = value[1:-1]
|
|
@@ -112,16 +114,16 @@ class SettingsDetector:
|
|
| 112 |
"model_definitions": SettingsDetector.detect_model_definitions(),
|
| 113 |
"concurrency_limits": SettingsDetector.detect_concurrency_limits(),
|
| 114 |
"model_filters": SettingsDetector.detect_model_filters(),
|
| 115 |
-
"provider_settings": SettingsDetector.detect_provider_settings()
|
| 116 |
}
|
| 117 |
-
|
| 118 |
@staticmethod
|
| 119 |
def detect_credentials() -> dict:
|
| 120 |
"""Detect API keys and OAuth credentials"""
|
| 121 |
from pathlib import Path
|
| 122 |
-
|
| 123 |
providers = {}
|
| 124 |
-
|
| 125 |
# Scan for API keys
|
| 126 |
env_vars = SettingsDetector._load_local_env()
|
| 127 |
for key, value in env_vars.items():
|
|
@@ -130,7 +132,7 @@ class SettingsDetector:
|
|
| 130 |
if provider not in providers:
|
| 131 |
providers[provider] = {"api_keys": 0, "oauth": 0, "custom": False}
|
| 132 |
providers[provider]["api_keys"] += 1
|
| 133 |
-
|
| 134 |
# Scan for OAuth credentials
|
| 135 |
oauth_dir = Path("oauth_credentials")
|
| 136 |
if oauth_dir.exists():
|
|
@@ -139,19 +141,19 @@ class SettingsDetector:
|
|
| 139 |
if provider not in providers:
|
| 140 |
providers[provider] = {"api_keys": 0, "oauth": 0, "custom": False}
|
| 141 |
providers[provider]["oauth"] += 1
|
| 142 |
-
|
| 143 |
# Mark custom providers (have API_BASE set)
|
| 144 |
for provider in providers:
|
| 145 |
if os.getenv(f"{provider.upper()}_API_BASE"):
|
| 146 |
providers[provider]["custom"] = True
|
| 147 |
-
|
| 148 |
return providers
|
| 149 |
-
|
| 150 |
@staticmethod
|
| 151 |
def detect_custom_api_bases() -> dict:
|
| 152 |
"""Detect custom API base URLs (not in hardcoded map)"""
|
| 153 |
from proxy_app.provider_urls import PROVIDER_URL_MAP
|
| 154 |
-
|
| 155 |
bases = {}
|
| 156 |
env_vars = SettingsDetector._load_local_env()
|
| 157 |
for key, value in env_vars.items():
|
|
@@ -161,7 +163,7 @@ class SettingsDetector:
|
|
| 161 |
if provider not in PROVIDER_URL_MAP:
|
| 162 |
bases[provider] = value
|
| 163 |
return bases
|
| 164 |
-
|
| 165 |
@staticmethod
|
| 166 |
def detect_model_definitions() -> dict:
|
| 167 |
"""Detect provider model definitions"""
|
|
@@ -179,7 +181,7 @@ class SettingsDetector:
|
|
| 179 |
except (json.JSONDecodeError, ValueError):
|
| 180 |
pass
|
| 181 |
return models
|
| 182 |
-
|
| 183 |
@staticmethod
|
| 184 |
def detect_concurrency_limits() -> dict:
|
| 185 |
"""Detect max concurrent requests per key"""
|
|
@@ -193,7 +195,7 @@ class SettingsDetector:
|
|
| 193 |
except (json.JSONDecodeError, ValueError):
|
| 194 |
pass
|
| 195 |
return limits
|
| 196 |
-
|
| 197 |
@staticmethod
|
| 198 |
def detect_model_filters() -> dict:
|
| 199 |
"""Detect active model filters (basic info only: defined or not)"""
|
|
@@ -210,7 +212,7 @@ class SettingsDetector:
|
|
| 210 |
else:
|
| 211 |
filters[provider]["has_whitelist"] = True
|
| 212 |
return filters
|
| 213 |
-
|
| 214 |
@staticmethod
|
| 215 |
def detect_provider_settings() -> dict:
|
| 216 |
"""Detect provider-specific settings (Antigravity, Gemini CLI)"""
|
|
@@ -219,10 +221,10 @@ class SettingsDetector:
|
|
| 219 |
except ImportError:
|
| 220 |
# Fallback for direct execution or testing
|
| 221 |
from .settings_tool import PROVIDER_SETTINGS_MAP
|
| 222 |
-
|
| 223 |
provider_settings = {}
|
| 224 |
env_vars = SettingsDetector._load_local_env()
|
| 225 |
-
|
| 226 |
for provider, definitions in PROVIDER_SETTINGS_MAP.items():
|
| 227 |
modified_count = 0
|
| 228 |
for key, definition in definitions.items():
|
|
@@ -231,7 +233,7 @@ class SettingsDetector:
|
|
| 231 |
# Check if value differs from default
|
| 232 |
default = definition.get("default")
|
| 233 |
setting_type = definition.get("type", "str")
|
| 234 |
-
|
| 235 |
try:
|
| 236 |
if setting_type == "bool":
|
| 237 |
current = env_value.lower() in ("true", "1", "yes")
|
|
@@ -239,21 +241,21 @@ class SettingsDetector:
|
|
| 239 |
current = int(env_value)
|
| 240 |
else:
|
| 241 |
current = env_value
|
| 242 |
-
|
| 243 |
if current != default:
|
| 244 |
modified_count += 1
|
| 245 |
except (ValueError, AttributeError):
|
| 246 |
pass
|
| 247 |
-
|
| 248 |
if modified_count > 0:
|
| 249 |
provider_settings[provider] = modified_count
|
| 250 |
-
|
| 251 |
return provider_settings
|
| 252 |
|
| 253 |
|
| 254 |
class LauncherTUI:
|
| 255 |
"""Main launcher interface"""
|
| 256 |
-
|
| 257 |
def __init__(self):
|
| 258 |
self.console = Console()
|
| 259 |
self.config = LauncherConfig()
|
|
@@ -261,90 +263,100 @@ class LauncherTUI:
|
|
| 261 |
self.env_file = Path.cwd() / ".env"
|
| 262 |
# Load .env file to ensure environment variables are available
|
| 263 |
load_dotenv(dotenv_path=self.env_file, override=True)
|
| 264 |
-
|
| 265 |
def needs_onboarding(self) -> bool:
|
| 266 |
"""Check if onboarding is needed"""
|
| 267 |
return not self.env_file.exists() or not os.getenv("PROXY_API_KEY")
|
| 268 |
-
|
| 269 |
def run(self):
|
| 270 |
"""Main TUI loop"""
|
| 271 |
while self.running:
|
| 272 |
self.show_main_menu()
|
| 273 |
-
|
| 274 |
def show_main_menu(self):
|
| 275 |
"""Display main menu and handle selection"""
|
| 276 |
clear_screen()
|
| 277 |
-
|
| 278 |
# Detect all settings
|
| 279 |
settings = SettingsDetector.get_all_settings()
|
| 280 |
credentials = settings["credentials"]
|
| 281 |
custom_bases = settings["custom_bases"]
|
| 282 |
-
|
| 283 |
# Check if setup is needed
|
| 284 |
show_warning = self.needs_onboarding()
|
| 285 |
-
|
| 286 |
# Build title with GitHub link
|
| 287 |
-
self.console.print(
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
# Show warning if .env file doesn't exist
|
| 294 |
if show_warning:
|
| 295 |
self.console.print()
|
| 296 |
-
self.console.print(
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
|
|
|
|
|
|
| 315 |
# Show security warning if PROXY_API_KEY is missing (but .env exists)
|
| 316 |
elif not os.getenv("PROXY_API_KEY"):
|
| 317 |
self.console.print()
|
| 318 |
-
self.console.print(
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
|
|
|
|
|
|
| 333 |
# Show config
|
| 334 |
self.console.print()
|
| 335 |
self.console.print("[bold]📋 Proxy Configuration[/bold]")
|
| 336 |
self.console.print("━" * 70)
|
| 337 |
self.console.print(f" Host: {self.config.config['host']}")
|
| 338 |
self.console.print(f" Port: {self.config.config['port']}")
|
| 339 |
-
self.console.print(
|
| 340 |
-
|
|
|
|
|
|
|
| 341 |
# Show actual API key value
|
| 342 |
-
proxy_key = os.getenv(
|
| 343 |
if proxy_key:
|
| 344 |
self.console.print(f" Proxy API Key: {proxy_key}")
|
| 345 |
else:
|
| 346 |
self.console.print(" Proxy API Key: [red]Not Set (INSECURE!)[/red]")
|
| 347 |
-
|
| 348 |
# Show status summary
|
| 349 |
self.console.print()
|
| 350 |
self.console.print("[bold]📊 Status Summary[/bold]")
|
|
@@ -352,12 +364,19 @@ class LauncherTUI:
|
|
| 352 |
provider_count = len(credentials)
|
| 353 |
custom_count = len(custom_bases)
|
| 354 |
provider_settings = settings.get("provider_settings", {})
|
| 355 |
-
has_advanced = bool(
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
self.console.print(f" Providers: {provider_count} configured")
|
| 358 |
self.console.print(f" Custom Providers: {custom_count} configured")
|
| 359 |
-
self.console.print(
|
| 360 |
-
|
|
|
|
|
|
|
| 361 |
# Show menu
|
| 362 |
self.console.print()
|
| 363 |
self.console.print("━" * 70)
|
|
@@ -367,23 +386,29 @@ class LauncherTUI:
|
|
| 367 |
if show_warning:
|
| 368 |
self.console.print(" 1. ▶️ Run Proxy Server")
|
| 369 |
self.console.print(" 2. ⚙️ Configure Proxy Settings")
|
| 370 |
-
self.console.print(
|
|
|
|
|
|
|
| 371 |
else:
|
| 372 |
self.console.print(" 1. ▶️ Run Proxy Server")
|
| 373 |
self.console.print(" 2. ⚙️ Configure Proxy Settings")
|
| 374 |
self.console.print(" 3. 🔑 Manage Credentials")
|
| 375 |
-
|
| 376 |
self.console.print(" 4. 📊 View Provider & Advanced Settings")
|
| 377 |
self.console.print(" 5. 🔄 Reload Configuration")
|
| 378 |
self.console.print(" 6. ℹ️ About")
|
| 379 |
self.console.print(" 7. 🚪 Exit")
|
| 380 |
-
|
| 381 |
self.console.print()
|
| 382 |
self.console.print("━" * 70)
|
| 383 |
self.console.print()
|
| 384 |
-
|
| 385 |
-
choice = Prompt.ask(
|
| 386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
if choice == "1":
|
| 388 |
self.run_proxy()
|
| 389 |
elif choice == "2":
|
|
@@ -393,7 +418,7 @@ class LauncherTUI:
|
|
| 393 |
elif choice == "4":
|
| 394 |
self.show_provider_settings_menu()
|
| 395 |
elif choice == "5":
|
| 396 |
-
load_dotenv(dotenv_path=Path.cwd() / ".env",override=True)
|
| 397 |
self.config = LauncherConfig() # Reload config
|
| 398 |
self.console.print("\n[green]✅ Configuration reloaded![/green]")
|
| 399 |
elif choice == "6":
|
|
@@ -401,25 +426,64 @@ class LauncherTUI:
|
|
| 401 |
elif choice == "7":
|
| 402 |
self.running = False
|
| 403 |
sys.exit(0)
|
| 404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
def show_config_menu(self):
|
| 406 |
"""Display configuration sub-menu"""
|
| 407 |
while True:
|
| 408 |
clear_screen()
|
| 409 |
-
|
| 410 |
-
self.console.print(
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
|
|
|
| 415 |
self.console.print()
|
| 416 |
self.console.print("[bold]📋 Current Settings[/bold]")
|
| 417 |
self.console.print("━" * 70)
|
| 418 |
self.console.print(f" Host: {self.config.config['host']}")
|
| 419 |
self.console.print(f" Port: {self.config.config['port']}")
|
| 420 |
-
self.console.print(
|
| 421 |
-
|
| 422 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
self.console.print()
|
| 424 |
self.console.print("━" * 70)
|
| 425 |
self.console.print()
|
|
@@ -429,45 +493,172 @@ class LauncherTUI:
|
|
| 429 |
self.console.print(" 2. 🔌 Set Port")
|
| 430 |
self.console.print(" 3. 🔑 Set Proxy API Key")
|
| 431 |
self.console.print(" 4. 📝 Toggle Request Logging")
|
| 432 |
-
self.console.print(" 5.
|
| 433 |
-
|
|
|
|
| 434 |
self.console.print()
|
| 435 |
self.console.print("━" * 70)
|
| 436 |
self.console.print()
|
| 437 |
-
|
| 438 |
-
choice = Prompt.ask(
|
| 439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
if choice == "1":
|
| 441 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
self.config.update(host=new_host)
|
| 443 |
self.console.print(f"\n[green]✅ Host updated to: {new_host}[/green]")
|
| 444 |
elif choice == "2":
|
| 445 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
if 1 <= new_port <= 65535:
|
| 447 |
self.config.update(port=new_port)
|
| 448 |
-
self.console.print(
|
|
|
|
|
|
|
| 449 |
else:
|
| 450 |
self.console.print("\n[red]❌ Port must be between 1-65535[/red]")
|
| 451 |
elif choice == "3":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
current = os.getenv("PROXY_API_KEY", "")
|
| 453 |
-
new_key = Prompt.ask(
|
| 454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
LauncherConfig.update_proxy_api_key(new_key)
|
| 456 |
-
|
| 457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
else:
|
| 459 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 460 |
elif choice == "4":
|
| 461 |
current = self.config.config["enable_request_logging"]
|
| 462 |
self.config.update(enable_request_logging=not current)
|
| 463 |
-
self.console.print(
|
|
|
|
|
|
|
| 464 |
elif choice == "5":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
break
|
| 466 |
-
|
| 467 |
def show_provider_settings_menu(self):
|
| 468 |
"""Display provider/advanced settings (read-only + launch tool)"""
|
| 469 |
clear_screen()
|
| 470 |
-
|
| 471 |
settings = SettingsDetector.get_all_settings()
|
| 472 |
credentials = settings["credentials"]
|
| 473 |
custom_bases = settings["custom_bases"]
|
|
@@ -475,12 +666,14 @@ class LauncherTUI:
|
|
| 475 |
concurrency = settings["concurrency_limits"]
|
| 476 |
filters = settings["model_filters"]
|
| 477 |
provider_settings = settings.get("provider_settings", {})
|
| 478 |
-
|
| 479 |
-
self.console.print(
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
| 484 |
# Configured Providers
|
| 485 |
self.console.print()
|
| 486 |
self.console.print("[bold]📊 Configured Providers[/bold]")
|
|
@@ -490,18 +683,22 @@ class LauncherTUI:
|
|
| 490 |
provider_name = provider.title()
|
| 491 |
parts = []
|
| 492 |
if info["api_keys"] > 0:
|
| 493 |
-
parts.append(
|
|
|
|
|
|
|
| 494 |
if info["oauth"] > 0:
|
| 495 |
-
parts.append(
|
| 496 |
-
|
|
|
|
|
|
|
| 497 |
display = " + ".join(parts)
|
| 498 |
if info["custom"]:
|
| 499 |
display += " (Custom)"
|
| 500 |
-
|
| 501 |
self.console.print(f" ✅ {provider_name:20} {display}")
|
| 502 |
else:
|
| 503 |
self.console.print(" [dim]No providers configured[/dim]")
|
| 504 |
-
|
| 505 |
# Custom API Bases
|
| 506 |
if custom_bases:
|
| 507 |
self.console.print()
|
|
@@ -509,15 +706,17 @@ class LauncherTUI:
|
|
| 509 |
self.console.print("━" * 70)
|
| 510 |
for provider, base in custom_bases.items():
|
| 511 |
self.console.print(f" • {provider:15} {base}")
|
| 512 |
-
|
| 513 |
# Model Definitions
|
| 514 |
if model_defs:
|
| 515 |
self.console.print()
|
| 516 |
self.console.print("[bold]📦 Provider Model Definitions[/bold]")
|
| 517 |
self.console.print("━" * 70)
|
| 518 |
for provider, count in model_defs.items():
|
| 519 |
-
self.console.print(
|
| 520 |
-
|
|
|
|
|
|
|
| 521 |
# Concurrency Limits
|
| 522 |
if concurrency:
|
| 523 |
self.console.print()
|
|
@@ -526,7 +725,7 @@ class LauncherTUI:
|
|
| 526 |
for provider, limit in concurrency.items():
|
| 527 |
self.console.print(f" • {provider:15} {limit} requests/key")
|
| 528 |
self.console.print(" • Default: 1 request/key (all others)")
|
| 529 |
-
|
| 530 |
# Model Filters (basic info only)
|
| 531 |
if filters:
|
| 532 |
self.console.print()
|
|
@@ -540,7 +739,7 @@ class LauncherTUI:
|
|
| 540 |
status_parts.append("Ignore list")
|
| 541 |
status = " + ".join(status_parts) if status_parts else "None"
|
| 542 |
self.console.print(f" • {provider:15} ✅ {status}")
|
| 543 |
-
|
| 544 |
# Provider-Specific Settings
|
| 545 |
self.console.print()
|
| 546 |
self.console.print("[bold]🔬 Provider-Specific Settings[/bold]")
|
|
@@ -553,158 +752,207 @@ class LauncherTUI:
|
|
| 553 |
display_name = provider.replace("_", " ").title()
|
| 554 |
modified = provider_settings.get(provider, 0)
|
| 555 |
if modified > 0:
|
| 556 |
-
self.console.print(
|
|
|
|
|
|
|
| 557 |
else:
|
| 558 |
self.console.print(f" • {display_name:20} [dim]using defaults[/dim]")
|
| 559 |
-
|
| 560 |
# Actions
|
| 561 |
self.console.print()
|
| 562 |
self.console.print("━" * 70)
|
| 563 |
self.console.print()
|
| 564 |
self.console.print("[bold]💡 Actions[/bold]")
|
| 565 |
self.console.print()
|
| 566 |
-
self.console.print(
|
|
|
|
|
|
|
| 567 |
self.console.print(" 2. ↩️ Back to Main Menu")
|
| 568 |
-
|
| 569 |
self.console.print()
|
| 570 |
self.console.print("━" * 70)
|
| 571 |
-
self.console.print(
|
|
|
|
|
|
|
| 572 |
self.console.print()
|
| 573 |
-
self.console.print(
|
|
|
|
|
|
|
| 574 |
self.console.print()
|
| 575 |
-
|
| 576 |
choice = Prompt.ask("Select option", choices=["1", "2"], show_choices=False)
|
| 577 |
-
|
| 578 |
if choice == "1":
|
| 579 |
self.launch_settings_tool()
|
| 580 |
# choice == "2" returns to main menu
|
| 581 |
-
|
| 582 |
def launch_credential_tool(self):
|
| 583 |
"""Launch credential management tool"""
|
| 584 |
import time
|
| 585 |
-
|
| 586 |
# CRITICAL: Show full loading UI to replace the 6-7 second blank wait
|
| 587 |
clear_screen()
|
| 588 |
-
|
| 589 |
_start_time = time.time()
|
| 590 |
-
|
| 591 |
# Show the same header as standalone mode
|
| 592 |
self.console.print("━" * 70)
|
| 593 |
self.console.print("Interactive Credential Setup Tool")
|
| 594 |
self.console.print("GitHub: https://github.com/Mirrowel/LLM-API-Key-Proxy")
|
| 595 |
self.console.print("━" * 70)
|
| 596 |
self.console.print("Loading credential management components...")
|
| 597 |
-
|
| 598 |
# Now import with spinner (this is where the 6-7 second delay happens)
|
| 599 |
with self.console.status("Initializing credential tool...", spinner="dots"):
|
| 600 |
-
from rotator_library.credential_tool import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
_, PROVIDER_PLUGINS = _ensure_providers_loaded()
|
| 602 |
self.console.print("✓ Credential tool initialized")
|
| 603 |
|
| 604 |
_elapsed = time.time() - _start_time
|
| 605 |
-
self.console.print(
|
| 606 |
-
|
|
|
|
|
|
|
| 607 |
# Small delay to let user see the ready message
|
| 608 |
time.sleep(0.5)
|
| 609 |
-
|
| 610 |
# Run the tool with from_launcher=True to skip duplicate loading screen
|
| 611 |
run_credential_tool(from_launcher=True)
|
| 612 |
# Reload environment after credential tool
|
| 613 |
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 614 |
-
|
| 615 |
def launch_settings_tool(self):
|
| 616 |
"""Launch settings configuration tool"""
|
| 617 |
from proxy_app.settings_tool import run_settings_tool
|
|
|
|
| 618 |
run_settings_tool()
|
| 619 |
# Reload environment after settings tool
|
| 620 |
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 621 |
-
|
| 622 |
def show_about(self):
|
| 623 |
"""Display About page with project information"""
|
| 624 |
clear_screen()
|
| 625 |
-
|
| 626 |
-
self.console.print(
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
|
|
|
| 631 |
self.console.print()
|
| 632 |
self.console.print("[bold]📦 Project Information[/bold]")
|
| 633 |
self.console.print("━" * 70)
|
| 634 |
self.console.print(" [bold cyan]LLM API Key Proxy[/bold cyan]")
|
| 635 |
-
self.console.print(
|
|
|
|
|
|
|
| 636 |
self.console.print(" LLM API keys with automatic rotation and OAuth support")
|
| 637 |
self.console.print()
|
| 638 |
-
self.console.print(
|
| 639 |
-
|
|
|
|
|
|
|
| 640 |
self.console.print()
|
| 641 |
self.console.print("[bold]✨ Key Features[/bold]")
|
| 642 |
self.console.print("━" * 70)
|
| 643 |
-
self.console.print(
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
self.console.print(
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
self.console.print(
|
| 650 |
-
|
| 651 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 652 |
self.console.print()
|
| 653 |
self.console.print("[bold]📝 License & Credits[/bold]")
|
| 654 |
self.console.print("━" * 70)
|
| 655 |
self.console.print(" Made with ❤️ by the community")
|
| 656 |
self.console.print(" Open source - contributions welcome!")
|
| 657 |
-
|
| 658 |
self.console.print()
|
| 659 |
self.console.print("━" * 70)
|
| 660 |
self.console.print()
|
| 661 |
-
|
| 662 |
Prompt.ask("Press Enter to return to main menu", default="")
|
| 663 |
-
|
| 664 |
def run_proxy(self):
|
| 665 |
"""Prepare and launch proxy in same window"""
|
| 666 |
# Check if forced onboarding needed
|
| 667 |
if self.needs_onboarding():
|
| 668 |
clear_screen()
|
| 669 |
-
self.console.print(
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
|
|
|
|
|
|
| 678 |
# Force credential tool
|
| 679 |
-
from rotator_library.credential_tool import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
ensure_env_defaults()
|
| 681 |
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 682 |
run_credential_tool()
|
| 683 |
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 684 |
-
|
| 685 |
# Check again after credential tool
|
| 686 |
if not os.getenv("PROXY_API_KEY"):
|
| 687 |
-
self.console.print(
|
|
|
|
|
|
|
| 688 |
return
|
| 689 |
-
|
| 690 |
# Clear console and modify sys.argv
|
| 691 |
clear_screen()
|
| 692 |
-
self.console.print(
|
| 693 |
-
|
|
|
|
|
|
|
| 694 |
# Clear console again to remove the starting message before main.py shows loading details
|
| 695 |
import time
|
|
|
|
| 696 |
time.sleep(0.5) # Brief pause so user sees the message
|
| 697 |
clear_screen()
|
| 698 |
-
|
| 699 |
# Reconstruct sys.argv for main.py
|
| 700 |
sys.argv = [
|
| 701 |
"main.py",
|
| 702 |
-
"--host",
|
| 703 |
-
|
|
|
|
|
|
|
| 704 |
]
|
| 705 |
if self.config.config["enable_request_logging"]:
|
| 706 |
sys.argv.append("--enable-request-logging")
|
| 707 |
-
|
| 708 |
# Exit TUI - main.py will continue execution
|
| 709 |
self.running = False
|
| 710 |
|
|
|
|
| 18 |
|
| 19 |
def clear_screen():
|
| 20 |
"""
|
| 21 |
+
Cross-platform terminal clear that works robustly on both
|
| 22 |
classic Windows conhost and modern terminals (Windows Terminal, Linux, Mac).
|
| 23 |
+
|
| 24 |
Uses native OS commands instead of ANSI escape sequences:
|
| 25 |
- Windows (conhost & Windows Terminal): cls
|
| 26 |
- Unix-like systems (Linux, Mac): clear
|
| 27 |
"""
|
| 28 |
+
os.system("cls" if os.name == "nt" else "clear")
|
| 29 |
+
|
| 30 |
|
| 31 |
class LauncherConfig:
|
| 32 |
"""Manages launcher_config.json (host, port, logging only)"""
|
| 33 |
+
|
| 34 |
def __init__(self, config_path: Path = Path("launcher_config.json")):
|
| 35 |
self.config_path = config_path
|
| 36 |
self.defaults = {
|
| 37 |
"host": "127.0.0.1",
|
| 38 |
"port": 8000,
|
| 39 |
+
"enable_request_logging": False,
|
| 40 |
}
|
| 41 |
self.config = self.load()
|
| 42 |
+
|
| 43 |
def load(self) -> dict:
|
| 44 |
"""Load config from file or create with defaults."""
|
| 45 |
if self.config_path.exists():
|
| 46 |
try:
|
| 47 |
+
with open(self.config_path, "r") as f:
|
| 48 |
config = json.load(f)
|
| 49 |
# Merge with defaults for any missing keys
|
| 50 |
for key, value in self.defaults.items():
|
|
|
|
| 54 |
except (json.JSONDecodeError, IOError):
|
| 55 |
return self.defaults.copy()
|
| 56 |
return self.defaults.copy()
|
| 57 |
+
|
| 58 |
def save(self):
|
| 59 |
"""Save current config to file."""
|
| 60 |
import datetime
|
| 61 |
+
|
| 62 |
self.config["last_updated"] = datetime.datetime.now().isoformat()
|
| 63 |
try:
|
| 64 |
+
with open(self.config_path, "w") as f:
|
| 65 |
json.dump(self.config, f, indent=2)
|
| 66 |
except IOError as e:
|
| 67 |
console.print(f"[red]Error saving config: {e}[/red]")
|
| 68 |
+
|
| 69 |
def update(self, **kwargs):
|
| 70 |
"""Update config values."""
|
| 71 |
self.config.update(kwargs)
|
| 72 |
self.save()
|
| 73 |
+
|
| 74 |
@staticmethod
|
| 75 |
def update_proxy_api_key(new_key: str):
|
| 76 |
"""Update PROXY_API_KEY in .env only"""
|
|
|
|
| 81 |
|
| 82 |
class SettingsDetector:
|
| 83 |
"""Detects settings from .env for display"""
|
| 84 |
+
|
| 85 |
@staticmethod
|
| 86 |
def _load_local_env() -> dict:
|
| 87 |
"""Load environment variables from local .env file only"""
|
|
|
|
| 90 |
if not env_file.exists():
|
| 91 |
return env_dict
|
| 92 |
try:
|
| 93 |
+
with open(env_file, "r", encoding="utf-8") as f:
|
| 94 |
for line in f:
|
| 95 |
line = line.strip()
|
| 96 |
+
if not line or line.startswith("#"):
|
| 97 |
continue
|
| 98 |
+
if "=" in line:
|
| 99 |
+
key, _, value = line.partition("=")
|
| 100 |
key, value = key.strip(), value.strip()
|
| 101 |
if value and value[0] in ('"', "'") and value[-1] == value[0]:
|
| 102 |
value = value[1:-1]
|
|
|
|
| 114 |
"model_definitions": SettingsDetector.detect_model_definitions(),
|
| 115 |
"concurrency_limits": SettingsDetector.detect_concurrency_limits(),
|
| 116 |
"model_filters": SettingsDetector.detect_model_filters(),
|
| 117 |
+
"provider_settings": SettingsDetector.detect_provider_settings(),
|
| 118 |
}
|
| 119 |
+
|
| 120 |
@staticmethod
|
| 121 |
def detect_credentials() -> dict:
|
| 122 |
"""Detect API keys and OAuth credentials"""
|
| 123 |
from pathlib import Path
|
| 124 |
+
|
| 125 |
providers = {}
|
| 126 |
+
|
| 127 |
# Scan for API keys
|
| 128 |
env_vars = SettingsDetector._load_local_env()
|
| 129 |
for key, value in env_vars.items():
|
|
|
|
| 132 |
if provider not in providers:
|
| 133 |
providers[provider] = {"api_keys": 0, "oauth": 0, "custom": False}
|
| 134 |
providers[provider]["api_keys"] += 1
|
| 135 |
+
|
| 136 |
# Scan for OAuth credentials
|
| 137 |
oauth_dir = Path("oauth_credentials")
|
| 138 |
if oauth_dir.exists():
|
|
|
|
| 141 |
if provider not in providers:
|
| 142 |
providers[provider] = {"api_keys": 0, "oauth": 0, "custom": False}
|
| 143 |
providers[provider]["oauth"] += 1
|
| 144 |
+
|
| 145 |
# Mark custom providers (have API_BASE set)
|
| 146 |
for provider in providers:
|
| 147 |
if os.getenv(f"{provider.upper()}_API_BASE"):
|
| 148 |
providers[provider]["custom"] = True
|
| 149 |
+
|
| 150 |
return providers
|
| 151 |
+
|
| 152 |
@staticmethod
|
| 153 |
def detect_custom_api_bases() -> dict:
|
| 154 |
"""Detect custom API base URLs (not in hardcoded map)"""
|
| 155 |
from proxy_app.provider_urls import PROVIDER_URL_MAP
|
| 156 |
+
|
| 157 |
bases = {}
|
| 158 |
env_vars = SettingsDetector._load_local_env()
|
| 159 |
for key, value in env_vars.items():
|
|
|
|
| 163 |
if provider not in PROVIDER_URL_MAP:
|
| 164 |
bases[provider] = value
|
| 165 |
return bases
|
| 166 |
+
|
| 167 |
@staticmethod
|
| 168 |
def detect_model_definitions() -> dict:
|
| 169 |
"""Detect provider model definitions"""
|
|
|
|
| 181 |
except (json.JSONDecodeError, ValueError):
|
| 182 |
pass
|
| 183 |
return models
|
| 184 |
+
|
| 185 |
@staticmethod
|
| 186 |
def detect_concurrency_limits() -> dict:
|
| 187 |
"""Detect max concurrent requests per key"""
|
|
|
|
| 195 |
except (json.JSONDecodeError, ValueError):
|
| 196 |
pass
|
| 197 |
return limits
|
| 198 |
+
|
| 199 |
@staticmethod
|
| 200 |
def detect_model_filters() -> dict:
|
| 201 |
"""Detect active model filters (basic info only: defined or not)"""
|
|
|
|
| 212 |
else:
|
| 213 |
filters[provider]["has_whitelist"] = True
|
| 214 |
return filters
|
| 215 |
+
|
| 216 |
@staticmethod
|
| 217 |
def detect_provider_settings() -> dict:
|
| 218 |
"""Detect provider-specific settings (Antigravity, Gemini CLI)"""
|
|
|
|
| 221 |
except ImportError:
|
| 222 |
# Fallback for direct execution or testing
|
| 223 |
from .settings_tool import PROVIDER_SETTINGS_MAP
|
| 224 |
+
|
| 225 |
provider_settings = {}
|
| 226 |
env_vars = SettingsDetector._load_local_env()
|
| 227 |
+
|
| 228 |
for provider, definitions in PROVIDER_SETTINGS_MAP.items():
|
| 229 |
modified_count = 0
|
| 230 |
for key, definition in definitions.items():
|
|
|
|
| 233 |
# Check if value differs from default
|
| 234 |
default = definition.get("default")
|
| 235 |
setting_type = definition.get("type", "str")
|
| 236 |
+
|
| 237 |
try:
|
| 238 |
if setting_type == "bool":
|
| 239 |
current = env_value.lower() in ("true", "1", "yes")
|
|
|
|
| 241 |
current = int(env_value)
|
| 242 |
else:
|
| 243 |
current = env_value
|
| 244 |
+
|
| 245 |
if current != default:
|
| 246 |
modified_count += 1
|
| 247 |
except (ValueError, AttributeError):
|
| 248 |
pass
|
| 249 |
+
|
| 250 |
if modified_count > 0:
|
| 251 |
provider_settings[provider] = modified_count
|
| 252 |
+
|
| 253 |
return provider_settings
|
| 254 |
|
| 255 |
|
| 256 |
class LauncherTUI:
|
| 257 |
"""Main launcher interface"""
|
| 258 |
+
|
| 259 |
def __init__(self):
|
| 260 |
self.console = Console()
|
| 261 |
self.config = LauncherConfig()
|
|
|
|
| 263 |
self.env_file = Path.cwd() / ".env"
|
| 264 |
# Load .env file to ensure environment variables are available
|
| 265 |
load_dotenv(dotenv_path=self.env_file, override=True)
|
| 266 |
+
|
| 267 |
def needs_onboarding(self) -> bool:
|
| 268 |
"""Check if onboarding is needed"""
|
| 269 |
return not self.env_file.exists() or not os.getenv("PROXY_API_KEY")
|
| 270 |
+
|
| 271 |
def run(self):
|
| 272 |
"""Main TUI loop"""
|
| 273 |
while self.running:
|
| 274 |
self.show_main_menu()
|
| 275 |
+
|
| 276 |
def show_main_menu(self):
|
| 277 |
"""Display main menu and handle selection"""
|
| 278 |
clear_screen()
|
| 279 |
+
|
| 280 |
# Detect all settings
|
| 281 |
settings = SettingsDetector.get_all_settings()
|
| 282 |
credentials = settings["credentials"]
|
| 283 |
custom_bases = settings["custom_bases"]
|
| 284 |
+
|
| 285 |
# Check if setup is needed
|
| 286 |
show_warning = self.needs_onboarding()
|
| 287 |
+
|
| 288 |
# Build title with GitHub link
|
| 289 |
+
self.console.print(
|
| 290 |
+
Panel.fit(
|
| 291 |
+
"[bold cyan]🚀 LLM API Key Proxy - Interactive Launcher[/bold cyan]",
|
| 292 |
+
border_style="cyan",
|
| 293 |
+
)
|
| 294 |
+
)
|
| 295 |
+
self.console.print(
|
| 296 |
+
"[dim]GitHub: [blue underline]https://github.com/Mirrowel/LLM-API-Key-Proxy[/blue underline][/dim]"
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
# Show warning if .env file doesn't exist
|
| 300 |
if show_warning:
|
| 301 |
self.console.print()
|
| 302 |
+
self.console.print(
|
| 303 |
+
Panel(
|
| 304 |
+
Text.from_markup(
|
| 305 |
+
"⚠️ [bold yellow]INITIAL SETUP REQUIRED[/bold yellow]\n\n"
|
| 306 |
+
"The proxy needs initial configuration:\n"
|
| 307 |
+
" ❌ No .env file found\n\n"
|
| 308 |
+
"Why this matters:\n"
|
| 309 |
+
" • The .env file stores your credentials and settings\n"
|
| 310 |
+
" • PROXY_API_KEY protects your proxy from unauthorized access\n"
|
| 311 |
+
" • Provider API keys enable LLM access\n\n"
|
| 312 |
+
"What to do:\n"
|
| 313 |
+
' 1. Select option "3. Manage Credentials" to launch the credential tool\n'
|
| 314 |
+
" 2. The tool will create .env and set up PROXY_API_KEY automatically\n"
|
| 315 |
+
" 3. You can add provider credentials (API keys or OAuth)\n\n"
|
| 316 |
+
"⚠️ Note: The credential tool adds PROXY_API_KEY by default.\n"
|
| 317 |
+
" You can remove it later if you want an unsecured proxy."
|
| 318 |
+
),
|
| 319 |
+
border_style="yellow",
|
| 320 |
+
expand=False,
|
| 321 |
+
)
|
| 322 |
+
)
|
| 323 |
# Show security warning if PROXY_API_KEY is missing (but .env exists)
|
| 324 |
elif not os.getenv("PROXY_API_KEY"):
|
| 325 |
self.console.print()
|
| 326 |
+
self.console.print(
|
| 327 |
+
Panel(
|
| 328 |
+
Text.from_markup(
|
| 329 |
+
"⚠️ [bold red]SECURITY WARNING: PROXY_API_KEY Not Set[/bold red]\n\n"
|
| 330 |
+
"Your proxy is currently UNSECURED!\n"
|
| 331 |
+
"Anyone can access it without authentication.\n\n"
|
| 332 |
+
"This is a serious security risk if your proxy is accessible\n"
|
| 333 |
+
"from the internet or untrusted networks.\n\n"
|
| 334 |
+
"👉 [bold]Recommended:[/bold] Set PROXY_API_KEY in .env file\n"
|
| 335 |
+
' Use option "2. Configure Proxy Settings" → "3. Set Proxy API Key"\n'
|
| 336 |
+
' or option "3. Manage Credentials"'
|
| 337 |
+
),
|
| 338 |
+
border_style="red",
|
| 339 |
+
expand=False,
|
| 340 |
+
)
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
# Show config
|
| 344 |
self.console.print()
|
| 345 |
self.console.print("[bold]📋 Proxy Configuration[/bold]")
|
| 346 |
self.console.print("━" * 70)
|
| 347 |
self.console.print(f" Host: {self.config.config['host']}")
|
| 348 |
self.console.print(f" Port: {self.config.config['port']}")
|
| 349 |
+
self.console.print(
|
| 350 |
+
f" Request Logging: {'✅ Enabled' if self.config.config['enable_request_logging'] else '❌ Disabled'}"
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
# Show actual API key value
|
| 354 |
+
proxy_key = os.getenv("PROXY_API_KEY")
|
| 355 |
if proxy_key:
|
| 356 |
self.console.print(f" Proxy API Key: {proxy_key}")
|
| 357 |
else:
|
| 358 |
self.console.print(" Proxy API Key: [red]Not Set (INSECURE!)[/red]")
|
| 359 |
+
|
| 360 |
# Show status summary
|
| 361 |
self.console.print()
|
| 362 |
self.console.print("[bold]📊 Status Summary[/bold]")
|
|
|
|
| 364 |
provider_count = len(credentials)
|
| 365 |
custom_count = len(custom_bases)
|
| 366 |
provider_settings = settings.get("provider_settings", {})
|
| 367 |
+
has_advanced = bool(
|
| 368 |
+
settings["model_definitions"]
|
| 369 |
+
or settings["concurrency_limits"]
|
| 370 |
+
or settings["model_filters"]
|
| 371 |
+
or provider_settings
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
self.console.print(f" Providers: {provider_count} configured")
|
| 375 |
self.console.print(f" Custom Providers: {custom_count} configured")
|
| 376 |
+
self.console.print(
|
| 377 |
+
f" Advanced Settings: {'Active (view in menu 4)' if has_advanced else 'None'}"
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
# Show menu
|
| 381 |
self.console.print()
|
| 382 |
self.console.print("━" * 70)
|
|
|
|
| 386 |
if show_warning:
|
| 387 |
self.console.print(" 1. ▶️ Run Proxy Server")
|
| 388 |
self.console.print(" 2. ⚙️ Configure Proxy Settings")
|
| 389 |
+
self.console.print(
|
| 390 |
+
" 3. 🔑 Manage Credentials ⬅️ [bold yellow]Start here![/bold yellow]"
|
| 391 |
+
)
|
| 392 |
else:
|
| 393 |
self.console.print(" 1. ▶️ Run Proxy Server")
|
| 394 |
self.console.print(" 2. ⚙️ Configure Proxy Settings")
|
| 395 |
self.console.print(" 3. 🔑 Manage Credentials")
|
| 396 |
+
|
| 397 |
self.console.print(" 4. 📊 View Provider & Advanced Settings")
|
| 398 |
self.console.print(" 5. 🔄 Reload Configuration")
|
| 399 |
self.console.print(" 6. ℹ️ About")
|
| 400 |
self.console.print(" 7. 🚪 Exit")
|
| 401 |
+
|
| 402 |
self.console.print()
|
| 403 |
self.console.print("━" * 70)
|
| 404 |
self.console.print()
|
| 405 |
+
|
| 406 |
+
choice = Prompt.ask(
|
| 407 |
+
"Select option",
|
| 408 |
+
choices=["1", "2", "3", "4", "5", "6", "7"],
|
| 409 |
+
show_choices=False,
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
if choice == "1":
|
| 413 |
self.run_proxy()
|
| 414 |
elif choice == "2":
|
|
|
|
| 418 |
elif choice == "4":
|
| 419 |
self.show_provider_settings_menu()
|
| 420 |
elif choice == "5":
|
| 421 |
+
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 422 |
self.config = LauncherConfig() # Reload config
|
| 423 |
self.console.print("\n[green]✅ Configuration reloaded![/green]")
|
| 424 |
elif choice == "6":
|
|
|
|
| 426 |
elif choice == "7":
|
| 427 |
self.running = False
|
| 428 |
sys.exit(0)
|
| 429 |
+
|
| 430 |
+
def confirm_setting_change(self, setting_name: str, warning_lines: list) -> bool:
|
| 431 |
+
"""
|
| 432 |
+
Display a warning and require Y/N (case-sensitive) confirmation.
|
| 433 |
+
Re-prompts until user enters exactly 'Y' or 'N'.
|
| 434 |
+
Returns True only if user enters 'Y'.
|
| 435 |
+
"""
|
| 436 |
+
clear_screen()
|
| 437 |
+
self.console.print()
|
| 438 |
+
self.console.print(
|
| 439 |
+
Panel(
|
| 440 |
+
Text.from_markup(
|
| 441 |
+
f"[bold yellow]⚠️ WARNING: You are about to change the {setting_name}[/bold yellow]\n\n"
|
| 442 |
+
+ "\n".join(warning_lines)
|
| 443 |
+
+ "\n\n[bold]If you are not sure about changing this - don't.[/bold]"
|
| 444 |
+
),
|
| 445 |
+
border_style="yellow",
|
| 446 |
+
expand=False,
|
| 447 |
+
)
|
| 448 |
+
)
|
| 449 |
+
|
| 450 |
+
while True:
|
| 451 |
+
response = Prompt.ask(
|
| 452 |
+
"Enter [bold]Y[/bold] to confirm, [bold]N[/bold] to cancel (case-sensitive)"
|
| 453 |
+
)
|
| 454 |
+
if response == "Y":
|
| 455 |
+
return True
|
| 456 |
+
elif response == "N":
|
| 457 |
+
self.console.print("\n[dim]Operation cancelled.[/dim]")
|
| 458 |
+
return False
|
| 459 |
+
else:
|
| 460 |
+
self.console.print(
|
| 461 |
+
"[red]Please enter exactly 'Y' or 'N' (case-sensitive)[/red]"
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
def show_config_menu(self):
|
| 465 |
"""Display configuration sub-menu"""
|
| 466 |
while True:
|
| 467 |
clear_screen()
|
| 468 |
+
|
| 469 |
+
self.console.print(
|
| 470 |
+
Panel.fit(
|
| 471 |
+
"[bold cyan]⚙️ Proxy Configuration[/bold cyan]", border_style="cyan"
|
| 472 |
+
)
|
| 473 |
+
)
|
| 474 |
+
|
| 475 |
self.console.print()
|
| 476 |
self.console.print("[bold]📋 Current Settings[/bold]")
|
| 477 |
self.console.print("━" * 70)
|
| 478 |
self.console.print(f" Host: {self.config.config['host']}")
|
| 479 |
self.console.print(f" Port: {self.config.config['port']}")
|
| 480 |
+
self.console.print(
|
| 481 |
+
f" Request Logging: {'✅ Enabled' if self.config.config['enable_request_logging'] else '❌ Disabled'}"
|
| 482 |
+
)
|
| 483 |
+
self.console.print(
|
| 484 |
+
f" Proxy API Key: {'✅ Set' if os.getenv('PROXY_API_KEY') else '❌ Not Set'}"
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
self.console.print()
|
| 488 |
self.console.print("━" * 70)
|
| 489 |
self.console.print()
|
|
|
|
| 493 |
self.console.print(" 2. 🔌 Set Port")
|
| 494 |
self.console.print(" 3. 🔑 Set Proxy API Key")
|
| 495 |
self.console.print(" 4. 📝 Toggle Request Logging")
|
| 496 |
+
self.console.print(" 5. 🔄 Reset to Default Settings")
|
| 497 |
+
self.console.print(" 6. ↩️ Back to Main Menu")
|
| 498 |
+
|
| 499 |
self.console.print()
|
| 500 |
self.console.print("━" * 70)
|
| 501 |
self.console.print()
|
| 502 |
+
|
| 503 |
+
choice = Prompt.ask(
|
| 504 |
+
"Select option",
|
| 505 |
+
choices=["1", "2", "3", "4", "5", "6"],
|
| 506 |
+
show_choices=False,
|
| 507 |
+
)
|
| 508 |
+
|
| 509 |
if choice == "1":
|
| 510 |
+
# Show warning and require confirmation
|
| 511 |
+
confirmed = self.confirm_setting_change(
|
| 512 |
+
"Host IP",
|
| 513 |
+
[
|
| 514 |
+
"Changing the host IP affects which network interfaces the proxy listens on:",
|
| 515 |
+
" • [cyan]127.0.0.1[/cyan] = Local access only (recommended for development)",
|
| 516 |
+
" • [cyan]0.0.0.0[/cyan] = Accessible from all network interfaces",
|
| 517 |
+
"",
|
| 518 |
+
"Applications configured to connect to the old host may fail to connect.",
|
| 519 |
+
],
|
| 520 |
+
)
|
| 521 |
+
if not confirmed:
|
| 522 |
+
continue
|
| 523 |
+
|
| 524 |
+
new_host = Prompt.ask(
|
| 525 |
+
"Enter new host IP", default=self.config.config["host"]
|
| 526 |
+
)
|
| 527 |
self.config.update(host=new_host)
|
| 528 |
self.console.print(f"\n[green]✅ Host updated to: {new_host}[/green]")
|
| 529 |
elif choice == "2":
|
| 530 |
+
# Show warning and require confirmation
|
| 531 |
+
confirmed = self.confirm_setting_change(
|
| 532 |
+
"Port",
|
| 533 |
+
[
|
| 534 |
+
"Changing the port will affect all applications currently configured",
|
| 535 |
+
"to connect to your proxy on the existing port.",
|
| 536 |
+
"",
|
| 537 |
+
"Applications using the old port will fail to connect.",
|
| 538 |
+
],
|
| 539 |
+
)
|
| 540 |
+
if not confirmed:
|
| 541 |
+
continue
|
| 542 |
+
|
| 543 |
+
new_port = IntPrompt.ask(
|
| 544 |
+
"Enter new port", default=self.config.config["port"]
|
| 545 |
+
)
|
| 546 |
if 1 <= new_port <= 65535:
|
| 547 |
self.config.update(port=new_port)
|
| 548 |
+
self.console.print(
|
| 549 |
+
f"\n[green]✅ Port updated to: {new_port}[/green]"
|
| 550 |
+
)
|
| 551 |
else:
|
| 552 |
self.console.print("\n[red]❌ Port must be between 1-65535[/red]")
|
| 553 |
elif choice == "3":
|
| 554 |
+
# Show warning and require confirmation
|
| 555 |
+
confirmed = self.confirm_setting_change(
|
| 556 |
+
"Proxy API Key",
|
| 557 |
+
[
|
| 558 |
+
"This is the authentication key that applications use to access your proxy.",
|
| 559 |
+
"",
|
| 560 |
+
"[bold red]⚠️ Changing this will BREAK all applications currently configured",
|
| 561 |
+
" with the existing API key![/bold red]",
|
| 562 |
+
"",
|
| 563 |
+
"[bold cyan]💡 If you want to add provider API keys (OpenAI, Gemini, etc.),",
|
| 564 |
+
' go to "3. 🔑 Manage Credentials" in the main menu instead.[/bold cyan]',
|
| 565 |
+
],
|
| 566 |
+
)
|
| 567 |
+
if not confirmed:
|
| 568 |
+
continue
|
| 569 |
+
|
| 570 |
current = os.getenv("PROXY_API_KEY", "")
|
| 571 |
+
new_key = Prompt.ask(
|
| 572 |
+
"Enter new Proxy API Key (leave empty to disable authentication)",
|
| 573 |
+
default=current,
|
| 574 |
+
)
|
| 575 |
+
|
| 576 |
+
if new_key != current:
|
| 577 |
+
# If setting to empty, show additional warning
|
| 578 |
+
if not new_key:
|
| 579 |
+
self.console.print(
|
| 580 |
+
"\n[bold red]⚠️ Authentication will be DISABLED - anyone can access your proxy![/bold red]"
|
| 581 |
+
)
|
| 582 |
+
Prompt.ask("Press Enter to continue", default="")
|
| 583 |
+
|
| 584 |
LauncherConfig.update_proxy_api_key(new_key)
|
| 585 |
+
|
| 586 |
+
if new_key:
|
| 587 |
+
self.console.print(
|
| 588 |
+
"\n[green]✅ Proxy API Key updated successfully![/green]"
|
| 589 |
+
)
|
| 590 |
+
self.console.print(" Updated in .env file")
|
| 591 |
+
else:
|
| 592 |
+
self.console.print(
|
| 593 |
+
"\n[yellow]⚠️ Proxy API Key cleared - authentication disabled![/yellow]"
|
| 594 |
+
)
|
| 595 |
+
self.console.print(" Updated in .env file")
|
| 596 |
else:
|
| 597 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 598 |
elif choice == "4":
|
| 599 |
current = self.config.config["enable_request_logging"]
|
| 600 |
self.config.update(enable_request_logging=not current)
|
| 601 |
+
self.console.print(
|
| 602 |
+
f"\n[green]✅ Request Logging {'enabled' if not current else 'disabled'}![/green]"
|
| 603 |
+
)
|
| 604 |
elif choice == "5":
|
| 605 |
+
# Reset to Default Settings
|
| 606 |
+
# Define defaults
|
| 607 |
+
default_host = "127.0.0.1"
|
| 608 |
+
default_port = 8000
|
| 609 |
+
default_logging = False
|
| 610 |
+
default_api_key = "VerysecretKey"
|
| 611 |
+
|
| 612 |
+
# Get current values
|
| 613 |
+
current_host = self.config.config["host"]
|
| 614 |
+
current_port = self.config.config["port"]
|
| 615 |
+
current_logging = self.config.config["enable_request_logging"]
|
| 616 |
+
current_api_key = os.getenv("PROXY_API_KEY", "")
|
| 617 |
+
|
| 618 |
+
# Build comparison table
|
| 619 |
+
warning_lines = [
|
| 620 |
+
"This will reset ALL proxy settings to their defaults:",
|
| 621 |
+
"",
|
| 622 |
+
"[bold] Setting Current Value → Default Value[/bold]",
|
| 623 |
+
" " + "─" * 62,
|
| 624 |
+
f" Host IP {current_host:20} → {default_host}",
|
| 625 |
+
f" Port {str(current_port):20} → {default_port}",
|
| 626 |
+
f" Request Logging {'Enabled':20} → Disabled"
|
| 627 |
+
if current_logging
|
| 628 |
+
else f" Request Logging {'Disabled':20} → Disabled",
|
| 629 |
+
f" Proxy API Key {current_api_key[:20]:20} → {default_api_key}",
|
| 630 |
+
"",
|
| 631 |
+
"[bold red]⚠️ This may break applications configured with current settings![/bold red]",
|
| 632 |
+
]
|
| 633 |
+
|
| 634 |
+
confirmed = self.confirm_setting_change(
|
| 635 |
+
"Settings (Reset to Defaults)", warning_lines
|
| 636 |
+
)
|
| 637 |
+
if not confirmed:
|
| 638 |
+
continue
|
| 639 |
+
|
| 640 |
+
# Apply defaults
|
| 641 |
+
self.config.update(
|
| 642 |
+
host=default_host,
|
| 643 |
+
port=default_port,
|
| 644 |
+
enable_request_logging=default_logging,
|
| 645 |
+
)
|
| 646 |
+
LauncherConfig.update_proxy_api_key(default_api_key)
|
| 647 |
+
|
| 648 |
+
self.console.print(
|
| 649 |
+
"\n[green]✅ All settings have been reset to defaults![/green]"
|
| 650 |
+
)
|
| 651 |
+
self.console.print(f" Host: {default_host}")
|
| 652 |
+
self.console.print(f" Port: {default_port}")
|
| 653 |
+
self.console.print(f" Request Logging: Disabled")
|
| 654 |
+
self.console.print(f" Proxy API Key: {default_api_key}")
|
| 655 |
+
elif choice == "6":
|
| 656 |
break
|
| 657 |
+
|
| 658 |
def show_provider_settings_menu(self):
|
| 659 |
"""Display provider/advanced settings (read-only + launch tool)"""
|
| 660 |
clear_screen()
|
| 661 |
+
|
| 662 |
settings = SettingsDetector.get_all_settings()
|
| 663 |
credentials = settings["credentials"]
|
| 664 |
custom_bases = settings["custom_bases"]
|
|
|
|
| 666 |
concurrency = settings["concurrency_limits"]
|
| 667 |
filters = settings["model_filters"]
|
| 668 |
provider_settings = settings.get("provider_settings", {})
|
| 669 |
+
|
| 670 |
+
self.console.print(
|
| 671 |
+
Panel.fit(
|
| 672 |
+
"[bold cyan]📊 Provider & Advanced Settings[/bold cyan]",
|
| 673 |
+
border_style="cyan",
|
| 674 |
+
)
|
| 675 |
+
)
|
| 676 |
+
|
| 677 |
# Configured Providers
|
| 678 |
self.console.print()
|
| 679 |
self.console.print("[bold]📊 Configured Providers[/bold]")
|
|
|
|
| 683 |
provider_name = provider.title()
|
| 684 |
parts = []
|
| 685 |
if info["api_keys"] > 0:
|
| 686 |
+
parts.append(
|
| 687 |
+
f"{info['api_keys']} API key{'s' if info['api_keys'] > 1 else ''}"
|
| 688 |
+
)
|
| 689 |
if info["oauth"] > 0:
|
| 690 |
+
parts.append(
|
| 691 |
+
f"{info['oauth']} OAuth credential{'s' if info['oauth'] > 1 else ''}"
|
| 692 |
+
)
|
| 693 |
+
|
| 694 |
display = " + ".join(parts)
|
| 695 |
if info["custom"]:
|
| 696 |
display += " (Custom)"
|
| 697 |
+
|
| 698 |
self.console.print(f" ✅ {provider_name:20} {display}")
|
| 699 |
else:
|
| 700 |
self.console.print(" [dim]No providers configured[/dim]")
|
| 701 |
+
|
| 702 |
# Custom API Bases
|
| 703 |
if custom_bases:
|
| 704 |
self.console.print()
|
|
|
|
| 706 |
self.console.print("━" * 70)
|
| 707 |
for provider, base in custom_bases.items():
|
| 708 |
self.console.print(f" • {provider:15} {base}")
|
| 709 |
+
|
| 710 |
# Model Definitions
|
| 711 |
if model_defs:
|
| 712 |
self.console.print()
|
| 713 |
self.console.print("[bold]📦 Provider Model Definitions[/bold]")
|
| 714 |
self.console.print("━" * 70)
|
| 715 |
for provider, count in model_defs.items():
|
| 716 |
+
self.console.print(
|
| 717 |
+
f" • {provider:15} {count} model{'s' if count > 1 else ''} configured"
|
| 718 |
+
)
|
| 719 |
+
|
| 720 |
# Concurrency Limits
|
| 721 |
if concurrency:
|
| 722 |
self.console.print()
|
|
|
|
| 725 |
for provider, limit in concurrency.items():
|
| 726 |
self.console.print(f" • {provider:15} {limit} requests/key")
|
| 727 |
self.console.print(" • Default: 1 request/key (all others)")
|
| 728 |
+
|
| 729 |
# Model Filters (basic info only)
|
| 730 |
if filters:
|
| 731 |
self.console.print()
|
|
|
|
| 739 |
status_parts.append("Ignore list")
|
| 740 |
status = " + ".join(status_parts) if status_parts else "None"
|
| 741 |
self.console.print(f" • {provider:15} ✅ {status}")
|
| 742 |
+
|
| 743 |
# Provider-Specific Settings
|
| 744 |
self.console.print()
|
| 745 |
self.console.print("[bold]🔬 Provider-Specific Settings[/bold]")
|
|
|
|
| 752 |
display_name = provider.replace("_", " ").title()
|
| 753 |
modified = provider_settings.get(provider, 0)
|
| 754 |
if modified > 0:
|
| 755 |
+
self.console.print(
|
| 756 |
+
f" • {display_name:20} [yellow]{modified} setting{'s' if modified > 1 else ''} modified[/yellow]"
|
| 757 |
+
)
|
| 758 |
else:
|
| 759 |
self.console.print(f" • {display_name:20} [dim]using defaults[/dim]")
|
| 760 |
+
|
| 761 |
# Actions
|
| 762 |
self.console.print()
|
| 763 |
self.console.print("━" * 70)
|
| 764 |
self.console.print()
|
| 765 |
self.console.print("[bold]💡 Actions[/bold]")
|
| 766 |
self.console.print()
|
| 767 |
+
self.console.print(
|
| 768 |
+
" 1. 🔧 Launch Settings Tool (configure advanced settings)"
|
| 769 |
+
)
|
| 770 |
self.console.print(" 2. ↩️ Back to Main Menu")
|
| 771 |
+
|
| 772 |
self.console.print()
|
| 773 |
self.console.print("━" * 70)
|
| 774 |
+
self.console.print(
|
| 775 |
+
"[dim]ℹ️ Advanced settings are stored in .env file.\n Use the Settings Tool to configure them interactively.[/dim]"
|
| 776 |
+
)
|
| 777 |
self.console.print()
|
| 778 |
+
self.console.print(
|
| 779 |
+
"[dim]⚠️ Note: Settings Tool supports only common configuration types.\n For complex settings, edit .env directly.[/dim]"
|
| 780 |
+
)
|
| 781 |
self.console.print()
|
| 782 |
+
|
| 783 |
choice = Prompt.ask("Select option", choices=["1", "2"], show_choices=False)
|
| 784 |
+
|
| 785 |
if choice == "1":
|
| 786 |
self.launch_settings_tool()
|
| 787 |
# choice == "2" returns to main menu
|
| 788 |
+
|
| 789 |
def launch_credential_tool(self):
|
| 790 |
"""Launch credential management tool"""
|
| 791 |
import time
|
| 792 |
+
|
| 793 |
# CRITICAL: Show full loading UI to replace the 6-7 second blank wait
|
| 794 |
clear_screen()
|
| 795 |
+
|
| 796 |
_start_time = time.time()
|
| 797 |
+
|
| 798 |
# Show the same header as standalone mode
|
| 799 |
self.console.print("━" * 70)
|
| 800 |
self.console.print("Interactive Credential Setup Tool")
|
| 801 |
self.console.print("GitHub: https://github.com/Mirrowel/LLM-API-Key-Proxy")
|
| 802 |
self.console.print("━" * 70)
|
| 803 |
self.console.print("Loading credential management components...")
|
| 804 |
+
|
| 805 |
# Now import with spinner (this is where the 6-7 second delay happens)
|
| 806 |
with self.console.status("Initializing credential tool...", spinner="dots"):
|
| 807 |
+
from rotator_library.credential_tool import (
|
| 808 |
+
run_credential_tool,
|
| 809 |
+
_ensure_providers_loaded,
|
| 810 |
+
)
|
| 811 |
+
|
| 812 |
_, PROVIDER_PLUGINS = _ensure_providers_loaded()
|
| 813 |
self.console.print("✓ Credential tool initialized")
|
| 814 |
|
| 815 |
_elapsed = time.time() - _start_time
|
| 816 |
+
self.console.print(
|
| 817 |
+
f"✓ Tool ready in {_elapsed:.2f}s ({len(PROVIDER_PLUGINS)} providers available)"
|
| 818 |
+
)
|
| 819 |
+
|
| 820 |
# Small delay to let user see the ready message
|
| 821 |
time.sleep(0.5)
|
| 822 |
+
|
| 823 |
# Run the tool with from_launcher=True to skip duplicate loading screen
|
| 824 |
run_credential_tool(from_launcher=True)
|
| 825 |
# Reload environment after credential tool
|
| 826 |
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 827 |
+
|
| 828 |
def launch_settings_tool(self):
|
| 829 |
"""Launch settings configuration tool"""
|
| 830 |
from proxy_app.settings_tool import run_settings_tool
|
| 831 |
+
|
| 832 |
run_settings_tool()
|
| 833 |
# Reload environment after settings tool
|
| 834 |
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 835 |
+
|
| 836 |
def show_about(self):
|
| 837 |
"""Display About page with project information"""
|
| 838 |
clear_screen()
|
| 839 |
+
|
| 840 |
+
self.console.print(
|
| 841 |
+
Panel.fit(
|
| 842 |
+
"[bold cyan]ℹ️ About LLM API Key Proxy[/bold cyan]", border_style="cyan"
|
| 843 |
+
)
|
| 844 |
+
)
|
| 845 |
+
|
| 846 |
self.console.print()
|
| 847 |
self.console.print("[bold]📦 Project Information[/bold]")
|
| 848 |
self.console.print("━" * 70)
|
| 849 |
self.console.print(" [bold cyan]LLM API Key Proxy[/bold cyan]")
|
| 850 |
+
self.console.print(
|
| 851 |
+
" A lightweight, high-performance proxy server for managing"
|
| 852 |
+
)
|
| 853 |
self.console.print(" LLM API keys with automatic rotation and OAuth support")
|
| 854 |
self.console.print()
|
| 855 |
+
self.console.print(
|
| 856 |
+
" [dim]GitHub:[/dim] [blue underline]https://github.com/Mirrowel/LLM-API-Key-Proxy[/blue underline]"
|
| 857 |
+
)
|
| 858 |
+
|
| 859 |
self.console.print()
|
| 860 |
self.console.print("[bold]✨ Key Features[/bold]")
|
| 861 |
self.console.print("━" * 70)
|
| 862 |
+
self.console.print(
|
| 863 |
+
" • [green]Smart Key Rotation[/green] - Automatic rotation across multiple API keys"
|
| 864 |
+
)
|
| 865 |
+
self.console.print(
|
| 866 |
+
" • [green]OAuth Support[/green] - Automated OAuth flows for supported providers"
|
| 867 |
+
)
|
| 868 |
+
self.console.print(
|
| 869 |
+
" • [green]Multiple Providers[/green] - Support for 10+ LLM providers"
|
| 870 |
+
)
|
| 871 |
+
self.console.print(
|
| 872 |
+
" • [green]Custom Providers[/green] - Easy integration of custom OpenAI-compatible APIs"
|
| 873 |
+
)
|
| 874 |
+
self.console.print(
|
| 875 |
+
" • [green]Advanced Filtering[/green] - Model whitelists and ignore lists per provider"
|
| 876 |
+
)
|
| 877 |
+
self.console.print(
|
| 878 |
+
" • [green]Concurrency Control[/green] - Per-key rate limiting and request management"
|
| 879 |
+
)
|
| 880 |
+
self.console.print(
|
| 881 |
+
" • [green]Cost Tracking[/green] - Track usage and costs across all providers"
|
| 882 |
+
)
|
| 883 |
+
self.console.print(
|
| 884 |
+
" • [green]Interactive TUI[/green] - Beautiful terminal interface for easy configuration"
|
| 885 |
+
)
|
| 886 |
+
|
| 887 |
self.console.print()
|
| 888 |
self.console.print("[bold]📝 License & Credits[/bold]")
|
| 889 |
self.console.print("━" * 70)
|
| 890 |
self.console.print(" Made with ❤️ by the community")
|
| 891 |
self.console.print(" Open source - contributions welcome!")
|
| 892 |
+
|
| 893 |
self.console.print()
|
| 894 |
self.console.print("━" * 70)
|
| 895 |
self.console.print()
|
| 896 |
+
|
| 897 |
Prompt.ask("Press Enter to return to main menu", default="")
|
| 898 |
+
|
| 899 |
def run_proxy(self):
|
| 900 |
"""Prepare and launch proxy in same window"""
|
| 901 |
# Check if forced onboarding needed
|
| 902 |
if self.needs_onboarding():
|
| 903 |
clear_screen()
|
| 904 |
+
self.console.print(
|
| 905 |
+
Panel(
|
| 906 |
+
Text.from_markup(
|
| 907 |
+
"⚠️ [bold yellow]Setup Required[/bold yellow]\n\n"
|
| 908 |
+
"Cannot start without .env.\n"
|
| 909 |
+
"Launching credential tool..."
|
| 910 |
+
),
|
| 911 |
+
border_style="yellow",
|
| 912 |
+
)
|
| 913 |
+
)
|
| 914 |
+
|
| 915 |
# Force credential tool
|
| 916 |
+
from rotator_library.credential_tool import (
|
| 917 |
+
ensure_env_defaults,
|
| 918 |
+
run_credential_tool,
|
| 919 |
+
)
|
| 920 |
+
|
| 921 |
ensure_env_defaults()
|
| 922 |
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 923 |
run_credential_tool()
|
| 924 |
load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
|
| 925 |
+
|
| 926 |
# Check again after credential tool
|
| 927 |
if not os.getenv("PROXY_API_KEY"):
|
| 928 |
+
self.console.print(
|
| 929 |
+
"\n[red]❌ PROXY_API_KEY still not set. Cannot start proxy.[/red]"
|
| 930 |
+
)
|
| 931 |
return
|
| 932 |
+
|
| 933 |
# Clear console and modify sys.argv
|
| 934 |
clear_screen()
|
| 935 |
+
self.console.print(
|
| 936 |
+
f"\n[bold green]🚀 Starting proxy on {self.config.config['host']}:{self.config.config['port']}...[/bold green]\n"
|
| 937 |
+
)
|
| 938 |
+
|
| 939 |
# Clear console again to remove the starting message before main.py shows loading details
|
| 940 |
import time
|
| 941 |
+
|
| 942 |
time.sleep(0.5) # Brief pause so user sees the message
|
| 943 |
clear_screen()
|
| 944 |
+
|
| 945 |
# Reconstruct sys.argv for main.py
|
| 946 |
sys.argv = [
|
| 947 |
"main.py",
|
| 948 |
+
"--host",
|
| 949 |
+
self.config.config["host"],
|
| 950 |
+
"--port",
|
| 951 |
+
str(self.config.config["port"]),
|
| 952 |
]
|
| 953 |
if self.config.config["enable_request_logging"]:
|
| 954 |
sys.argv.append("--enable-request-logging")
|
| 955 |
+
|
| 956 |
# Exit TUI - main.py will continue execution
|
| 957 |
self.running = False
|
| 958 |
|
src/proxy_app/main.py
CHANGED
|
@@ -10,10 +10,18 @@ import logging
|
|
| 10 |
|
| 11 |
# --- Argument Parsing (BEFORE heavy imports) ---
|
| 12 |
parser = argparse.ArgumentParser(description="API Key Proxy Server")
|
| 13 |
-
parser.add_argument(
|
|
|
|
|
|
|
| 14 |
parser.add_argument("--port", type=int, default=8000, help="Port to run the server on.")
|
| 15 |
-
parser.add_argument(
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
args, _ = parser.parse_known_args()
|
| 18 |
|
| 19 |
# Add the 'src' directory to the Python path
|
|
@@ -23,6 +31,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent))
|
|
| 23 |
if len(sys.argv) == 1:
|
| 24 |
# TUI MODE - Load ONLY what's needed for the launcher (fast path!)
|
| 25 |
from proxy_app.launcher_tui import run_launcher_tui
|
|
|
|
| 26 |
run_launcher_tui()
|
| 27 |
# Launcher modifies sys.argv and returns, or exits if user chose Exit
|
| 28 |
# If we get here, user chose "Run Proxy" and sys.argv is modified
|
|
@@ -32,6 +41,7 @@ if len(sys.argv) == 1:
|
|
| 32 |
# Check if credential tool mode (also doesn't need heavy proxy imports)
|
| 33 |
if args.add_credential:
|
| 34 |
from rotator_library.credential_tool import run_credential_tool
|
|
|
|
| 35 |
run_credential_tool()
|
| 36 |
sys.exit(0)
|
| 37 |
|
|
@@ -74,6 +84,7 @@ print("Loading server components...")
|
|
| 74 |
|
| 75 |
# Phase 2: Load Rich for loading spinner (lightweight)
|
| 76 |
from rich.console import Console
|
|
|
|
| 77 |
_console = Console()
|
| 78 |
|
| 79 |
# Phase 3: Heavy dependencies with granular loading messages
|
|
@@ -92,7 +103,7 @@ with _console.status("[dim]Loading core dependencies...", spinner="dots"):
|
|
| 92 |
import json
|
| 93 |
from typing import AsyncGenerator, Any, List, Optional, Union
|
| 94 |
from pydantic import BaseModel, Field
|
| 95 |
-
|
| 96 |
# --- Early Log Level Configuration ---
|
| 97 |
logging.getLogger("LiteLLM").setLevel(logging.WARNING)
|
| 98 |
|
|
@@ -100,7 +111,7 @@ print(" → Loading LiteLLM library...")
|
|
| 100 |
with _console.status("[dim]Loading LiteLLM library...", spinner="dots"):
|
| 101 |
import litellm
|
| 102 |
|
| 103 |
-
# Phase 4: Application imports with granular loading messages
|
| 104 |
print(" → Initializing proxy core...")
|
| 105 |
with _console.status("[dim]Initializing proxy core...", spinner="dots"):
|
| 106 |
from rotator_library import RotatingClient
|
|
@@ -115,12 +126,15 @@ print(" → Discovering provider plugins...")
|
|
| 115 |
# Provider lazy loading happens during import, so time it here
|
| 116 |
_provider_start = time.time()
|
| 117 |
with _console.status("[dim]Discovering provider plugins...", spinner="dots"):
|
| 118 |
-
from rotator_library import
|
|
|
|
|
|
|
| 119 |
_provider_time = time.time() - _provider_start
|
| 120 |
|
| 121 |
# Get count after import (without timing to avoid double-counting)
|
| 122 |
_plugin_count = len(PROVIDER_PLUGINS)
|
| 123 |
|
|
|
|
| 124 |
# --- Pydantic Models ---
|
| 125 |
class EmbeddingRequest(BaseModel):
|
| 126 |
model: str
|
|
@@ -129,15 +143,19 @@ class EmbeddingRequest(BaseModel):
|
|
| 129 |
dimensions: Optional[int] = None
|
| 130 |
user: Optional[str] = None
|
| 131 |
|
|
|
|
| 132 |
class ModelCard(BaseModel):
|
| 133 |
"""Basic model card for minimal response."""
|
|
|
|
| 134 |
id: str
|
| 135 |
object: str = "model"
|
| 136 |
created: int = Field(default_factory=lambda: int(time.time()))
|
| 137 |
owned_by: str = "Mirro-Proxy"
|
| 138 |
|
|
|
|
| 139 |
class ModelCapabilities(BaseModel):
|
| 140 |
"""Model capability flags."""
|
|
|
|
| 141 |
tool_choice: bool = False
|
| 142 |
function_calling: bool = False
|
| 143 |
reasoning: bool = False
|
|
@@ -146,8 +164,10 @@ class ModelCapabilities(BaseModel):
|
|
| 146 |
prompt_caching: bool = False
|
| 147 |
assistant_prefill: bool = False
|
| 148 |
|
|
|
|
| 149 |
class EnrichedModelCard(BaseModel):
|
| 150 |
"""Extended model card with pricing and capabilities."""
|
|
|
|
| 151 |
id: str
|
| 152 |
object: str = "model"
|
| 153 |
created: int = Field(default_factory=lambda: int(time.time()))
|
|
@@ -169,28 +189,36 @@ class EnrichedModelCard(BaseModel):
|
|
| 169 |
# Debug info (optional)
|
| 170 |
_sources: Optional[List[str]] = None
|
| 171 |
_match_type: Optional[str] = None
|
| 172 |
-
|
| 173 |
class Config:
|
| 174 |
extra = "allow" # Allow extra fields from the service
|
| 175 |
|
|
|
|
| 176 |
class ModelList(BaseModel):
|
| 177 |
"""List of models response."""
|
|
|
|
| 178 |
object: str = "list"
|
| 179 |
data: List[ModelCard]
|
| 180 |
|
|
|
|
| 181 |
class EnrichedModelList(BaseModel):
|
| 182 |
"""List of enriched models with pricing and capabilities."""
|
|
|
|
| 183 |
object: str = "list"
|
| 184 |
data: List[EnrichedModelCard]
|
| 185 |
|
|
|
|
| 186 |
# Calculate total loading time
|
| 187 |
_elapsed = time.time() - _start_time
|
| 188 |
-
print(
|
|
|
|
|
|
|
| 189 |
|
| 190 |
# Clear screen and reprint header for clean startup view
|
| 191 |
# This pushes loading messages up (still in scroll history) but shows a clean final screen
|
| 192 |
import os as _os_module
|
| 193 |
-
|
|
|
|
| 194 |
|
| 195 |
# Reprint header
|
| 196 |
print("━" * 70)
|
|
@@ -198,7 +226,9 @@ print(f"Starting proxy on {args.host}:{args.port}")
|
|
| 198 |
print(f"Proxy API Key: {key_display}")
|
| 199 |
print(f"GitHub: https://github.com/Mirrowel/LLM-API-Key-Proxy")
|
| 200 |
print("━" * 70)
|
| 201 |
-
print(
|
|
|
|
|
|
|
| 202 |
|
| 203 |
|
| 204 |
# Note: Debug logging will be added after logging configuration below
|
|
@@ -211,52 +241,64 @@ LOG_DIR.mkdir(exist_ok=True)
|
|
| 211 |
console_handler = colorlog.StreamHandler(sys.stdout)
|
| 212 |
console_handler.setLevel(logging.INFO)
|
| 213 |
formatter = colorlog.ColoredFormatter(
|
| 214 |
-
|
| 215 |
log_colors={
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
}
|
| 222 |
)
|
| 223 |
console_handler.setFormatter(formatter)
|
| 224 |
|
| 225 |
# Configure a file handler for INFO-level logs and higher
|
| 226 |
info_file_handler = logging.FileHandler(LOG_DIR / "proxy.log", encoding="utf-8")
|
| 227 |
info_file_handler.setLevel(logging.INFO)
|
| 228 |
-
info_file_handler.setFormatter(
|
|
|
|
|
|
|
| 229 |
|
| 230 |
# Configure a dedicated file handler for all DEBUG-level logs
|
| 231 |
debug_file_handler = logging.FileHandler(LOG_DIR / "proxy_debug.log", encoding="utf-8")
|
| 232 |
debug_file_handler.setLevel(logging.DEBUG)
|
| 233 |
-
debug_file_handler.setFormatter(
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
# Create a filter to ensure the debug handler ONLY gets DEBUG messages from the rotator_library
|
| 236 |
class RotatorDebugFilter(logging.Filter):
|
| 237 |
def filter(self, record):
|
| 238 |
-
return record.levelno == logging.DEBUG and record.name.startswith(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
debug_file_handler.addFilter(RotatorDebugFilter())
|
| 240 |
|
| 241 |
# Configure a console handler with color
|
| 242 |
console_handler = colorlog.StreamHandler(sys.stdout)
|
| 243 |
console_handler.setLevel(logging.INFO)
|
| 244 |
formatter = colorlog.ColoredFormatter(
|
| 245 |
-
|
| 246 |
log_colors={
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
}
|
| 253 |
)
|
| 254 |
console_handler.setFormatter(formatter)
|
| 255 |
|
|
|
|
| 256 |
# Add a filter to prevent any LiteLLM logs from cluttering the console
|
| 257 |
class NoLiteLLMLogFilter(logging.Filter):
|
| 258 |
def filter(self, record):
|
| 259 |
-
return not record.name.startswith(
|
|
|
|
|
|
|
| 260 |
console_handler.addFilter(NoLiteLLMLogFilter())
|
| 261 |
|
| 262 |
# Get the root logger and set it to DEBUG to capture all messages
|
|
@@ -306,18 +348,26 @@ ignore_models = {}
|
|
| 306 |
for key, value in os.environ.items():
|
| 307 |
if key.startswith("IGNORE_MODELS_"):
|
| 308 |
provider = key.replace("IGNORE_MODELS_", "").lower()
|
| 309 |
-
models_to_ignore = [
|
|
|
|
|
|
|
| 310 |
ignore_models[provider] = models_to_ignore
|
| 311 |
-
logging.debug(
|
|
|
|
|
|
|
| 312 |
|
| 313 |
# Load model whitelist from environment variables
|
| 314 |
whitelist_models = {}
|
| 315 |
for key, value in os.environ.items():
|
| 316 |
if key.startswith("WHITELIST_MODELS_"):
|
| 317 |
provider = key.replace("WHITELIST_MODELS_", "").lower()
|
| 318 |
-
models_to_whitelist = [
|
|
|
|
|
|
|
| 319 |
whitelist_models[provider] = models_to_whitelist
|
| 320 |
-
logging.debug(
|
|
|
|
|
|
|
| 321 |
|
| 322 |
# Load max concurrent requests per key from environment variables
|
| 323 |
max_concurrent_requests_per_key = {}
|
|
@@ -327,12 +377,19 @@ for key, value in os.environ.items():
|
|
| 327 |
try:
|
| 328 |
max_concurrent = int(value)
|
| 329 |
if max_concurrent < 1:
|
| 330 |
-
logging.warning(
|
|
|
|
|
|
|
| 331 |
max_concurrent = 1
|
| 332 |
max_concurrent_requests_per_key[provider] = max_concurrent
|
| 333 |
-
logging.debug(
|
|
|
|
|
|
|
| 334 |
except ValueError:
|
| 335 |
-
logging.warning(
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
# --- Lifespan Management ---
|
| 338 |
@asynccontextmanager
|
|
@@ -349,11 +406,11 @@ async def lifespan(app: FastAPI):
|
|
| 349 |
if not skip_oauth_init and oauth_credentials:
|
| 350 |
logging.info("Starting OAuth credential validation and deduplication...")
|
| 351 |
processed_emails = {} # email -> {provider: path}
|
| 352 |
-
credentials_to_initialize = {}
|
| 353 |
final_oauth_credentials = {}
|
| 354 |
|
| 355 |
# --- Pass 1: Pre-initialization Scan & Deduplication ---
|
| 356 |
-
#logging.info("Pass 1: Scanning for existing metadata to find duplicates...")
|
| 357 |
for provider, paths in oauth_credentials.items():
|
| 358 |
if provider not in credentials_to_initialize:
|
| 359 |
credentials_to_initialize[provider] = []
|
|
@@ -362,9 +419,9 @@ async def lifespan(app: FastAPI):
|
|
| 362 |
if path.startswith("env://"):
|
| 363 |
credentials_to_initialize[provider].append(path)
|
| 364 |
continue
|
| 365 |
-
|
| 366 |
try:
|
| 367 |
-
with open(path,
|
| 368 |
data = json.load(f)
|
| 369 |
metadata = data.get("_proxy_metadata", {})
|
| 370 |
email = metadata.get("email")
|
|
@@ -372,28 +429,32 @@ async def lifespan(app: FastAPI):
|
|
| 372 |
if email:
|
| 373 |
if email not in processed_emails:
|
| 374 |
processed_emails[email] = {}
|
| 375 |
-
|
| 376 |
if provider in processed_emails[email]:
|
| 377 |
original_path = processed_emails[email][provider]
|
| 378 |
-
logging.warning(
|
|
|
|
|
|
|
| 379 |
continue
|
| 380 |
else:
|
| 381 |
processed_emails[email][provider] = path
|
| 382 |
-
|
| 383 |
credentials_to_initialize[provider].append(path)
|
| 384 |
|
| 385 |
except (FileNotFoundError, json.JSONDecodeError) as e:
|
| 386 |
-
logging.warning(
|
|
|
|
|
|
|
| 387 |
credentials_to_initialize[provider].append(path)
|
| 388 |
-
|
| 389 |
# --- Pass 2: Parallel Initialization of Filtered Credentials ---
|
| 390 |
-
#logging.info("Pass 2: Initializing unique credentials and performing final check...")
|
| 391 |
async def process_credential(provider: str, path: str, provider_instance):
|
| 392 |
"""Process a single credential: initialize and fetch user info."""
|
| 393 |
try:
|
| 394 |
await provider_instance.initialize_token(path)
|
| 395 |
|
| 396 |
-
if not hasattr(provider_instance,
|
| 397 |
return (provider, path, None, None)
|
| 398 |
|
| 399 |
user_info = await provider_instance.get_user_info(path)
|
|
@@ -401,7 +462,9 @@ async def lifespan(app: FastAPI):
|
|
| 401 |
return (provider, path, email, None)
|
| 402 |
|
| 403 |
except Exception as e:
|
| 404 |
-
logging.error(
|
|
|
|
|
|
|
| 405 |
return (provider, path, None, e)
|
| 406 |
|
| 407 |
# Collect all tasks for parallel execution
|
|
@@ -413,9 +476,9 @@ async def lifespan(app: FastAPI):
|
|
| 413 |
provider_plugin_class = PROVIDER_PLUGINS.get(provider)
|
| 414 |
if not provider_plugin_class:
|
| 415 |
continue
|
| 416 |
-
|
| 417 |
provider_instance = provider_plugin_class()
|
| 418 |
-
|
| 419 |
for path in paths:
|
| 420 |
tasks.append(process_credential(provider, path, provider_instance))
|
| 421 |
|
|
@@ -430,7 +493,7 @@ async def lifespan(app: FastAPI):
|
|
| 430 |
continue
|
| 431 |
|
| 432 |
provider, path, email, error = result
|
| 433 |
-
|
| 434 |
# Skip if there was an error
|
| 435 |
if error:
|
| 436 |
continue
|
|
@@ -444,7 +507,9 @@ async def lifespan(app: FastAPI):
|
|
| 444 |
|
| 445 |
# Handle empty email
|
| 446 |
if not email:
|
| 447 |
-
logging.warning(
|
|
|
|
|
|
|
| 448 |
if provider not in final_oauth_credentials:
|
| 449 |
final_oauth_credentials[provider] = []
|
| 450 |
final_oauth_credentials[provider].append(path)
|
|
@@ -453,10 +518,15 @@ async def lifespan(app: FastAPI):
|
|
| 453 |
# Deduplication check
|
| 454 |
if email not in processed_emails:
|
| 455 |
processed_emails[email] = {}
|
| 456 |
-
|
| 457 |
-
if
|
|
|
|
|
|
|
|
|
|
| 458 |
original_path = processed_emails[email][provider]
|
| 459 |
-
logging.warning(
|
|
|
|
|
|
|
| 460 |
continue
|
| 461 |
else:
|
| 462 |
processed_emails[email][provider] = path
|
|
@@ -467,7 +537,7 @@ async def lifespan(app: FastAPI):
|
|
| 467 |
# Update metadata (skip for env-based credentials - they don't have files)
|
| 468 |
if not path.startswith("env://"):
|
| 469 |
try:
|
| 470 |
-
with open(path,
|
| 471 |
data = json.load(f)
|
| 472 |
metadata = data.get("_proxy_metadata", {})
|
| 473 |
metadata["email"] = email
|
|
@@ -490,15 +560,15 @@ async def lifespan(app: FastAPI):
|
|
| 490 |
# The client now uses the root logger configuration
|
| 491 |
client = RotatingClient(
|
| 492 |
api_keys=api_keys,
|
| 493 |
-
oauth_credentials=oauth_credentials,
|
| 494 |
configure_logging=True,
|
| 495 |
litellm_provider_params=litellm_provider_params,
|
| 496 |
ignore_models=ignore_models,
|
| 497 |
whitelist_models=whitelist_models,
|
| 498 |
enable_request_logging=ENABLE_REQUEST_LOGGING,
|
| 499 |
-
max_concurrent_requests_per_key=max_concurrent_requests_per_key
|
| 500 |
)
|
| 501 |
-
|
| 502 |
# Log loaded credentials summary (compact, always visible for deployment verification)
|
| 503 |
#_api_summary = ', '.join([f"{p}:{len(c)}" for p, c in api_keys.items()]) if api_keys else "none"
|
| 504 |
#_oauth_summary = ', '.join([f"{p}:{len(c)}" for p, c in oauth_credentials.items()]) if oauth_credentials else "none"
|
|
@@ -506,17 +576,19 @@ async def lifespan(app: FastAPI):
|
|
| 506 |
#print(f"🔑 Credentials loaded: {_total_summary} (API: {_api_summary} | OAuth: {_oauth_summary})")
|
| 507 |
client.background_refresher.start() # Start the background task
|
| 508 |
app.state.rotating_client = client
|
| 509 |
-
|
| 510 |
# Warn if no provider credentials are configured
|
| 511 |
if not client.all_credentials:
|
| 512 |
logging.warning("=" * 70)
|
| 513 |
logging.warning("⚠️ NO PROVIDER CREDENTIALS CONFIGURED")
|
| 514 |
logging.warning("The proxy is running but cannot serve any LLM requests.")
|
| 515 |
-
logging.warning(
|
|
|
|
|
|
|
| 516 |
logging.warning(" • Executable: Run with --add-credential flag")
|
| 517 |
logging.warning(" • Source: python src/proxy_app/main.py --add-credential")
|
| 518 |
logging.warning("=" * 70)
|
| 519 |
-
|
| 520 |
os.environ["LITELLM_LOG"] = "ERROR"
|
| 521 |
litellm.set_verbose = False
|
| 522 |
litellm.drop_params = True
|
|
@@ -527,29 +599,30 @@ async def lifespan(app: FastAPI):
|
|
| 527 |
else:
|
| 528 |
app.state.embedding_batcher = None
|
| 529 |
logging.info("RotatingClient initialized (EmbeddingBatcher disabled).")
|
| 530 |
-
|
| 531 |
# Start model info service in background (fetches pricing/capabilities data)
|
| 532 |
# This runs asynchronously and doesn't block proxy startup
|
| 533 |
model_info_service = await init_model_info_service()
|
| 534 |
app.state.model_info_service = model_info_service
|
| 535 |
logging.info("Model info service started (fetching pricing data in background).")
|
| 536 |
-
|
| 537 |
yield
|
| 538 |
-
|
| 539 |
-
await client.background_refresher.stop()
|
| 540 |
if app.state.embedding_batcher:
|
| 541 |
await app.state.embedding_batcher.stop()
|
| 542 |
await client.close()
|
| 543 |
-
|
| 544 |
# Stop model info service
|
| 545 |
-
if hasattr(app.state,
|
| 546 |
await app.state.model_info_service.stop()
|
| 547 |
-
|
| 548 |
if app.state.embedding_batcher:
|
| 549 |
logging.info("RotatingClient and EmbeddingBatcher closed.")
|
| 550 |
else:
|
| 551 |
logging.info("RotatingClient closed.")
|
| 552 |
|
|
|
|
| 553 |
# --- FastAPI App Setup ---
|
| 554 |
app = FastAPI(lifespan=lifespan)
|
| 555 |
|
|
@@ -563,25 +636,32 @@ app.add_middleware(
|
|
| 563 |
)
|
| 564 |
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
|
| 565 |
|
|
|
|
| 566 |
def get_rotating_client(request: Request) -> RotatingClient:
|
| 567 |
"""Dependency to get the rotating client instance from the app state."""
|
| 568 |
return request.app.state.rotating_client
|
| 569 |
|
|
|
|
| 570 |
def get_embedding_batcher(request: Request) -> EmbeddingBatcher:
|
| 571 |
"""Dependency to get the embedding batcher instance from the app state."""
|
| 572 |
return request.app.state.embedding_batcher
|
| 573 |
|
|
|
|
| 574 |
async def verify_api_key(auth: str = Depends(api_key_header)):
|
| 575 |
"""Dependency to verify the proxy API key."""
|
|
|
|
|
|
|
|
|
|
| 576 |
if not auth or auth != f"Bearer {PROXY_API_KEY}":
|
| 577 |
raise HTTPException(status_code=401, detail="Invalid or missing API Key")
|
| 578 |
return auth
|
| 579 |
|
|
|
|
| 580 |
async def streaming_response_wrapper(
|
| 581 |
request: Request,
|
| 582 |
request_data: dict,
|
| 583 |
response_stream: AsyncGenerator[str, None],
|
| 584 |
-
logger: Optional[DetailedLogger] = None
|
| 585 |
) -> AsyncGenerator[str, None]:
|
| 586 |
"""
|
| 587 |
Wraps a streaming response to log the full response after completion
|
|
@@ -589,7 +669,7 @@ async def streaming_response_wrapper(
|
|
| 589 |
"""
|
| 590 |
response_chunks = []
|
| 591 |
full_response = {}
|
| 592 |
-
|
| 593 |
try:
|
| 594 |
async for chunk_str in response_stream:
|
| 595 |
if await request.is_disconnected():
|
|
@@ -597,7 +677,7 @@ async def streaming_response_wrapper(
|
|
| 597 |
break
|
| 598 |
yield chunk_str
|
| 599 |
if chunk_str.strip() and chunk_str.startswith("data:"):
|
| 600 |
-
content = chunk_str[len("data:"):].strip()
|
| 601 |
if content != "[DONE]":
|
| 602 |
try:
|
| 603 |
chunk_data = json.loads(content)
|
|
@@ -613,15 +693,17 @@ async def streaming_response_wrapper(
|
|
| 613 |
"error": {
|
| 614 |
"message": f"An unexpected error occurred during the stream: {str(e)}",
|
| 615 |
"type": "proxy_internal_error",
|
| 616 |
-
"code": 500
|
| 617 |
}
|
| 618 |
}
|
| 619 |
yield f"data: {json.dumps(error_payload)}\n\n"
|
| 620 |
yield "data: [DONE]\n\n"
|
| 621 |
# Also log this as a failed request
|
| 622 |
if logger:
|
| 623 |
-
logger.log_final_response(
|
| 624 |
-
|
|
|
|
|
|
|
| 625 |
finally:
|
| 626 |
if response_chunks:
|
| 627 |
# --- Aggregation Logic ---
|
|
@@ -645,36 +727,56 @@ async def streaming_response_wrapper(
|
|
| 645 |
final_message["content"] = ""
|
| 646 |
if value:
|
| 647 |
final_message["content"] += value
|
| 648 |
-
|
| 649 |
elif key == "tool_calls":
|
| 650 |
for tc_chunk in value:
|
| 651 |
index = tc_chunk["index"]
|
| 652 |
if index not in aggregated_tool_calls:
|
| 653 |
-
aggregated_tool_calls[index] = {
|
|
|
|
|
|
|
|
|
|
| 654 |
# Ensure 'function' key exists for this index before accessing its sub-keys
|
| 655 |
if "function" not in aggregated_tool_calls[index]:
|
| 656 |
-
aggregated_tool_calls[index]["function"] = {
|
|
|
|
|
|
|
|
|
|
| 657 |
if tc_chunk.get("id"):
|
| 658 |
aggregated_tool_calls[index]["id"] = tc_chunk["id"]
|
| 659 |
if "function" in tc_chunk:
|
| 660 |
if "name" in tc_chunk["function"]:
|
| 661 |
if tc_chunk["function"]["name"] is not None:
|
| 662 |
-
aggregated_tool_calls[index]["function"][
|
|
|
|
|
|
|
| 663 |
if "arguments" in tc_chunk["function"]:
|
| 664 |
-
if
|
| 665 |
-
|
| 666 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
elif key == "function_call":
|
| 668 |
if "function_call" not in final_message:
|
| 669 |
-
final_message["function_call"] = {
|
|
|
|
|
|
|
|
|
|
| 670 |
if "name" in value:
|
| 671 |
if value["name"] is not None:
|
| 672 |
-
final_message["function_call"]["name"] += value[
|
|
|
|
|
|
|
| 673 |
if "arguments" in value:
|
| 674 |
if value["arguments"] is not None:
|
| 675 |
-
final_message["function_call"]["arguments"] +=
|
| 676 |
-
|
| 677 |
-
|
|
|
|
|
|
|
| 678 |
# FIX: Role should always replace, never concatenate
|
| 679 |
if key == "role":
|
| 680 |
final_message[key] = value
|
|
@@ -707,7 +809,7 @@ async def streaming_response_wrapper(
|
|
| 707 |
final_choice = {
|
| 708 |
"index": 0,
|
| 709 |
"message": final_message,
|
| 710 |
-
"finish_reason": finish_reason
|
| 711 |
}
|
| 712 |
|
| 713 |
full_response = {
|
|
@@ -716,21 +818,22 @@ async def streaming_response_wrapper(
|
|
| 716 |
"created": first_chunk.get("created"),
|
| 717 |
"model": first_chunk.get("model"),
|
| 718 |
"choices": [final_choice],
|
| 719 |
-
"usage": usage_data
|
| 720 |
}
|
| 721 |
|
| 722 |
if logger:
|
| 723 |
logger.log_final_response(
|
| 724 |
status_code=200,
|
| 725 |
headers=None, # Headers are not available at this stage
|
| 726 |
-
body=full_response
|
| 727 |
)
|
| 728 |
|
|
|
|
| 729 |
@app.post("/v1/chat/completions")
|
| 730 |
async def chat_completions(
|
| 731 |
request: Request,
|
| 732 |
client: RotatingClient = Depends(get_rotating_client),
|
| 733 |
-
_
|
| 734 |
):
|
| 735 |
"""
|
| 736 |
OpenAI-compatible endpoint powered by the RotatingClient.
|
|
@@ -749,16 +852,24 @@ async def chat_completions(
|
|
| 749 |
# instead of actual schemas, which can cause tool hallucination
|
| 750 |
# Modes: "remove" = delete temperature key, "set" = change to 1.0, "false" = disabled
|
| 751 |
override_temp_zero = os.getenv("OVERRIDE_TEMPERATURE_ZERO", "false").lower()
|
| 752 |
-
|
| 753 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 754 |
if override_temp_zero == "remove":
|
| 755 |
# Remove temperature key entirely
|
| 756 |
del request_data["temperature"]
|
| 757 |
-
logging.debug(
|
|
|
|
|
|
|
| 758 |
else:
|
| 759 |
# Set to 1.0 (for "set", "true", "1", "yes")
|
| 760 |
request_data["temperature"] = 1.0
|
| 761 |
-
logging.debug(
|
|
|
|
|
|
|
| 762 |
|
| 763 |
# If logging is enabled, perform all logging operations using the parsed data.
|
| 764 |
if logger:
|
|
@@ -766,9 +877,17 @@ async def chat_completions(
|
|
| 766 |
|
| 767 |
# Extract and log specific reasoning parameters for monitoring.
|
| 768 |
model = request_data.get("model")
|
| 769 |
-
generation_cfg =
|
| 770 |
-
|
| 771 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 772 |
|
| 773 |
logging.getLogger("rotator_library").debug(
|
| 774 |
f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
|
|
@@ -779,31 +898,41 @@ async def chat_completions(
|
|
| 779 |
url=str(request.url),
|
| 780 |
headers=dict(request.headers),
|
| 781 |
client_info=(request.client.host, request.client.port),
|
| 782 |
-
request_data=request_data
|
| 783 |
)
|
| 784 |
is_streaming = request_data.get("stream", False)
|
| 785 |
|
| 786 |
if is_streaming:
|
| 787 |
response_generator = client.acompletion(request=request, **request_data)
|
| 788 |
return StreamingResponse(
|
| 789 |
-
streaming_response_wrapper(
|
| 790 |
-
|
|
|
|
|
|
|
| 791 |
)
|
| 792 |
else:
|
| 793 |
response = await client.acompletion(request=request, **request_data)
|
| 794 |
if logger:
|
| 795 |
# Assuming response has status_code and headers attributes
|
| 796 |
# This might need adjustment based on the actual response object
|
| 797 |
-
response_headers =
|
| 798 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 799 |
logger.log_final_response(
|
| 800 |
status_code=status_code,
|
| 801 |
headers=response_headers,
|
| 802 |
-
body=response.model_dump()
|
| 803 |
)
|
| 804 |
return response
|
| 805 |
|
| 806 |
-
except (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 807 |
raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
|
| 808 |
except litellm.AuthenticationError as e:
|
| 809 |
raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
|
|
@@ -824,16 +953,19 @@ async def chat_completions(
|
|
| 824 |
except json.JSONDecodeError:
|
| 825 |
request_data = {"error": "Could not parse request body"}
|
| 826 |
if logger:
|
| 827 |
-
logger.log_final_response(
|
|
|
|
|
|
|
| 828 |
raise HTTPException(status_code=500, detail=str(e))
|
| 829 |
|
|
|
|
| 830 |
@app.post("/v1/embeddings")
|
| 831 |
async def embeddings(
|
| 832 |
request: Request,
|
| 833 |
body: EmbeddingRequest,
|
| 834 |
client: RotatingClient = Depends(get_rotating_client),
|
| 835 |
batcher: Optional[EmbeddingBatcher] = Depends(get_embedding_batcher),
|
| 836 |
-
_
|
| 837 |
):
|
| 838 |
"""
|
| 839 |
OpenAI-compatible endpoint for creating embeddings.
|
|
@@ -847,7 +979,7 @@ async def embeddings(
|
|
| 847 |
url=str(request.url),
|
| 848 |
headers=dict(request.headers),
|
| 849 |
client_info=(request.client.host, request.client.port),
|
| 850 |
-
request_data=request_data
|
| 851 |
)
|
| 852 |
if USE_EMBEDDING_BATCHER and batcher:
|
| 853 |
# --- Server-Side Batching Logic ---
|
|
@@ -861,7 +993,7 @@ async def embeddings(
|
|
| 861 |
individual_request = request_data.copy()
|
| 862 |
individual_request["input"] = single_input
|
| 863 |
tasks.append(batcher.add_request(individual_request))
|
| 864 |
-
|
| 865 |
results = await asyncio.gather(*tasks)
|
| 866 |
|
| 867 |
all_data = []
|
|
@@ -877,16 +1009,19 @@ async def embeddings(
|
|
| 877 |
"object": "list",
|
| 878 |
"model": results[0]["model"],
|
| 879 |
"data": all_data,
|
| 880 |
-
"usage": {
|
|
|
|
|
|
|
|
|
|
| 881 |
}
|
| 882 |
response = litellm.EmbeddingResponse(**final_response_data)
|
| 883 |
-
|
| 884 |
else:
|
| 885 |
# --- Direct Pass-Through Logic ---
|
| 886 |
request_data = body.model_dump(exclude_none=True)
|
| 887 |
if isinstance(request_data.get("input"), str):
|
| 888 |
request_data["input"] = [request_data["input"]]
|
| 889 |
-
|
| 890 |
response = await client.aembedding(request=request, **request_data)
|
| 891 |
|
| 892 |
return response
|
|
@@ -894,7 +1029,11 @@ async def embeddings(
|
|
| 894 |
except HTTPException as e:
|
| 895 |
# Re-raise HTTPException to ensure it's not caught by the generic Exception handler
|
| 896 |
raise e
|
| 897 |
-
except (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 898 |
raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
|
| 899 |
except litellm.AuthenticationError as e:
|
| 900 |
raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
|
|
@@ -910,10 +1049,12 @@ async def embeddings(
|
|
| 910 |
logging.error(f"Embedding request failed: {e}")
|
| 911 |
raise HTTPException(status_code=500, detail=str(e))
|
| 912 |
|
|
|
|
| 913 |
@app.get("/")
|
| 914 |
def read_root():
|
| 915 |
return {"Status": "API Key Proxy is running"}
|
| 916 |
|
|
|
|
| 917 |
@app.get("/v1/models")
|
| 918 |
async def list_models(
|
| 919 |
request: Request,
|
|
@@ -923,22 +1064,30 @@ async def list_models(
|
|
| 923 |
):
|
| 924 |
"""
|
| 925 |
Returns a list of available models in the OpenAI-compatible format.
|
| 926 |
-
|
| 927 |
Query Parameters:
|
| 928 |
enriched: If True (default), returns detailed model info with pricing and capabilities.
|
| 929 |
If False, returns minimal OpenAI-compatible response.
|
| 930 |
"""
|
| 931 |
model_ids = await client.get_all_available_models(grouped=False)
|
| 932 |
-
|
| 933 |
-
if enriched and hasattr(request.app.state,
|
| 934 |
model_info_service = request.app.state.model_info_service
|
| 935 |
if model_info_service.is_ready:
|
| 936 |
# Return enriched model data
|
| 937 |
enriched_data = model_info_service.enrich_model_list(model_ids)
|
| 938 |
return {"object": "list", "data": enriched_data}
|
| 939 |
-
|
| 940 |
# Fallback to basic model cards
|
| 941 |
-
model_cards = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
return {"object": "list", "data": model_cards}
|
| 943 |
|
| 944 |
|
|
@@ -950,17 +1099,17 @@ async def get_model(
|
|
| 950 |
):
|
| 951 |
"""
|
| 952 |
Returns detailed information about a specific model.
|
| 953 |
-
|
| 954 |
Path Parameters:
|
| 955 |
model_id: The model ID (e.g., "anthropic/claude-3-opus", "openrouter/openai/gpt-4")
|
| 956 |
"""
|
| 957 |
-
if hasattr(request.app.state,
|
| 958 |
model_info_service = request.app.state.model_info_service
|
| 959 |
if model_info_service.is_ready:
|
| 960 |
info = model_info_service.get_model_info(model_id)
|
| 961 |
if info:
|
| 962 |
return info.to_dict()
|
| 963 |
-
|
| 964 |
# Return basic info if service not ready or model not found
|
| 965 |
return {
|
| 966 |
"id": model_id,
|
|
@@ -978,7 +1127,7 @@ async def model_info_stats(
|
|
| 978 |
"""
|
| 979 |
Returns statistics about the model info service (for monitoring/debugging).
|
| 980 |
"""
|
| 981 |
-
if hasattr(request.app.state,
|
| 982 |
return request.app.state.model_info_service.get_stats()
|
| 983 |
return {"error": "Model info service not initialized"}
|
| 984 |
|
|
@@ -990,11 +1139,12 @@ async def list_providers(_=Depends(verify_api_key)):
|
|
| 990 |
"""
|
| 991 |
return list(PROVIDER_PLUGINS.keys())
|
| 992 |
|
|
|
|
| 993 |
@app.post("/v1/token-count")
|
| 994 |
async def token_count(
|
| 995 |
-
request: Request,
|
| 996 |
client: RotatingClient = Depends(get_rotating_client),
|
| 997 |
-
_=Depends(verify_api_key)
|
| 998 |
):
|
| 999 |
"""
|
| 1000 |
Calculates the token count for a given list of messages and a model.
|
|
@@ -1005,7 +1155,9 @@ async def token_count(
|
|
| 1005 |
messages = data.get("messages")
|
| 1006 |
|
| 1007 |
if not model or not messages:
|
| 1008 |
-
raise HTTPException(
|
|
|
|
|
|
|
| 1009 |
|
| 1010 |
count = client.token_count(**data)
|
| 1011 |
return {"token_count": count}
|
|
@@ -1016,13 +1168,10 @@ async def token_count(
|
|
| 1016 |
|
| 1017 |
|
| 1018 |
@app.post("/v1/cost-estimate")
|
| 1019 |
-
async def cost_estimate(
|
| 1020 |
-
request: Request,
|
| 1021 |
-
_=Depends(verify_api_key)
|
| 1022 |
-
):
|
| 1023 |
"""
|
| 1024 |
Estimates the cost for a request based on token counts and model pricing.
|
| 1025 |
-
|
| 1026 |
Request body:
|
| 1027 |
{
|
| 1028 |
"model": "anthropic/claude-3-opus",
|
|
@@ -1031,7 +1180,7 @@ async def cost_estimate(
|
|
| 1031 |
"cache_read_tokens": 0, # optional
|
| 1032 |
"cache_creation_tokens": 0 # optional
|
| 1033 |
}
|
| 1034 |
-
|
| 1035 |
Returns:
|
| 1036 |
{
|
| 1037 |
"model": "anthropic/claude-3-opus",
|
|
@@ -1051,25 +1200,28 @@ async def cost_estimate(
|
|
| 1051 |
completion_tokens = data.get("completion_tokens", 0)
|
| 1052 |
cache_read_tokens = data.get("cache_read_tokens", 0)
|
| 1053 |
cache_creation_tokens = data.get("cache_creation_tokens", 0)
|
| 1054 |
-
|
| 1055 |
if not model:
|
| 1056 |
raise HTTPException(status_code=400, detail="'model' is required.")
|
| 1057 |
-
|
| 1058 |
result = {
|
| 1059 |
"model": model,
|
| 1060 |
"cost": None,
|
| 1061 |
"currency": "USD",
|
| 1062 |
"pricing": {},
|
| 1063 |
-
"source": None
|
| 1064 |
}
|
| 1065 |
-
|
| 1066 |
# Try model info service first
|
| 1067 |
-
if hasattr(request.app.state,
|
| 1068 |
model_info_service = request.app.state.model_info_service
|
| 1069 |
if model_info_service.is_ready:
|
| 1070 |
cost = model_info_service.calculate_cost(
|
| 1071 |
-
model,
|
| 1072 |
-
|
|
|
|
|
|
|
|
|
|
| 1073 |
)
|
| 1074 |
if cost is not None:
|
| 1075 |
cost_info = model_info_service.get_cost_info(model)
|
|
@@ -1077,31 +1229,32 @@ async def cost_estimate(
|
|
| 1077 |
result["pricing"] = cost_info or {}
|
| 1078 |
result["source"] = "model_info_service"
|
| 1079 |
return result
|
| 1080 |
-
|
| 1081 |
# Fallback to litellm
|
| 1082 |
try:
|
| 1083 |
import litellm
|
|
|
|
| 1084 |
# Create a mock response for cost calculation
|
| 1085 |
model_info = litellm.get_model_info(model)
|
| 1086 |
input_cost = model_info.get("input_cost_per_token", 0)
|
| 1087 |
output_cost = model_info.get("output_cost_per_token", 0)
|
| 1088 |
-
|
| 1089 |
if input_cost or output_cost:
|
| 1090 |
cost = (prompt_tokens * input_cost) + (completion_tokens * output_cost)
|
| 1091 |
result["cost"] = cost
|
| 1092 |
result["pricing"] = {
|
| 1093 |
"input_cost_per_token": input_cost,
|
| 1094 |
-
"output_cost_per_token": output_cost
|
| 1095 |
}
|
| 1096 |
result["source"] = "litellm_fallback"
|
| 1097 |
return result
|
| 1098 |
except Exception:
|
| 1099 |
pass
|
| 1100 |
-
|
| 1101 |
result["source"] = "unknown"
|
| 1102 |
result["error"] = "Pricing data not available for this model"
|
| 1103 |
return result
|
| 1104 |
-
|
| 1105 |
except HTTPException:
|
| 1106 |
raise
|
| 1107 |
except Exception as e:
|
|
@@ -1112,17 +1265,18 @@ async def cost_estimate(
|
|
| 1112 |
if __name__ == "__main__":
|
| 1113 |
# Define ENV_FILE for onboarding checks
|
| 1114 |
ENV_FILE = Path.cwd() / ".env"
|
| 1115 |
-
|
| 1116 |
# Check if launcher TUI should be shown (no arguments provided)
|
| 1117 |
if len(sys.argv) == 1:
|
| 1118 |
# No arguments - show launcher TUI (lazy import)
|
| 1119 |
from proxy_app.launcher_tui import run_launcher_tui
|
|
|
|
| 1120 |
run_launcher_tui()
|
| 1121 |
# Launcher modifies sys.argv and returns, or exits if user chose Exit
|
| 1122 |
# If we get here, user chose "Run Proxy" and sys.argv is modified
|
| 1123 |
# Re-parse arguments with modified sys.argv
|
| 1124 |
args = parser.parse_args()
|
| 1125 |
-
|
| 1126 |
def needs_onboarding() -> bool:
|
| 1127 |
"""
|
| 1128 |
Check if the proxy needs onboarding (first-time setup).
|
|
@@ -1132,40 +1286,49 @@ if __name__ == "__main__":
|
|
| 1132 |
# PROXY_API_KEY is optional (will show warning if not set)
|
| 1133 |
if not ENV_FILE.is_file():
|
| 1134 |
return True
|
| 1135 |
-
|
| 1136 |
return False
|
| 1137 |
|
| 1138 |
def show_onboarding_message():
|
| 1139 |
"""Display clear explanatory message for why onboarding is needed."""
|
| 1140 |
-
os.system(
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1145 |
console.print("[bold yellow]⚠️ Configuration Required[/bold yellow]\n")
|
| 1146 |
-
|
| 1147 |
console.print("The proxy needs initial configuration:")
|
| 1148 |
console.print(" [red]❌ No .env file found[/red]")
|
| 1149 |
-
|
| 1150 |
console.print("\n[bold]Why this matters:[/bold]")
|
| 1151 |
console.print(" • The .env file stores your credentials and settings")
|
| 1152 |
console.print(" • PROXY_API_KEY protects your proxy from unauthorized access")
|
| 1153 |
console.print(" • Provider API keys enable LLM access")
|
| 1154 |
-
|
| 1155 |
console.print("\n[bold]What happens next:[/bold]")
|
| 1156 |
console.print(" 1. We'll create a .env file with PROXY_API_KEY")
|
| 1157 |
console.print(" 2. You can add LLM provider credentials (API keys or OAuth)")
|
| 1158 |
console.print(" 3. The proxy will then start normally")
|
| 1159 |
-
|
| 1160 |
-
console.print(
|
|
|
|
|
|
|
| 1161 |
console.print(" You can remove it later if you want an unsecured proxy.\n")
|
| 1162 |
-
|
| 1163 |
-
console.input(
|
|
|
|
|
|
|
| 1164 |
|
| 1165 |
# Check if user explicitly wants to add credentials
|
| 1166 |
if args.add_credential:
|
| 1167 |
# Import and call ensure_env_defaults to create .env and PROXY_API_KEY if needed
|
| 1168 |
from rotator_library.credential_tool import ensure_env_defaults
|
|
|
|
| 1169 |
ensure_env_defaults()
|
| 1170 |
# Reload environment variables after ensure_env_defaults creates/updates .env
|
| 1171 |
load_dotenv(override=True)
|
|
@@ -1176,36 +1339,35 @@ if __name__ == "__main__":
|
|
| 1176 |
# Import console from rich for better messaging
|
| 1177 |
from rich.console import Console
|
| 1178 |
from rich.panel import Panel
|
|
|
|
| 1179 |
console = Console()
|
| 1180 |
-
|
| 1181 |
# Show clear explanatory message
|
| 1182 |
show_onboarding_message()
|
| 1183 |
-
|
| 1184 |
# Launch credential tool automatically
|
| 1185 |
from rotator_library.credential_tool import ensure_env_defaults
|
|
|
|
| 1186 |
ensure_env_defaults()
|
| 1187 |
load_dotenv(override=True)
|
| 1188 |
run_credential_tool()
|
| 1189 |
-
|
| 1190 |
# After credential tool exits, reload and re-check
|
| 1191 |
load_dotenv(override=True)
|
| 1192 |
# Re-read PROXY_API_KEY from environment
|
| 1193 |
PROXY_API_KEY = os.getenv("PROXY_API_KEY")
|
| 1194 |
-
|
| 1195 |
# Verify onboarding is complete
|
| 1196 |
if needs_onboarding():
|
| 1197 |
console.print("\n[bold red]❌ Configuration incomplete.[/bold red]")
|
| 1198 |
-
console.print(
|
|
|
|
|
|
|
| 1199 |
sys.exit(1)
|
| 1200 |
else:
|
| 1201 |
console.print("\n[bold green]✅ Configuration complete![/bold green]")
|
| 1202 |
console.print("\nStarting proxy server...\n")
|
| 1203 |
-
|
| 1204 |
-
# Validate PROXY_API_KEY before starting the server
|
| 1205 |
-
if not PROXY_API_KEY:
|
| 1206 |
-
raise ValueError("PROXY_API_KEY environment variable not set. Please run with --add-credential to set up your environment.")
|
| 1207 |
-
|
| 1208 |
-
import uvicorn
|
| 1209 |
-
uvicorn.run(app, host=args.host, port=args.port)
|
| 1210 |
|
|
|
|
| 1211 |
|
|
|
|
|
|
| 10 |
|
| 11 |
# --- Argument Parsing (BEFORE heavy imports) ---
|
| 12 |
parser = argparse.ArgumentParser(description="API Key Proxy Server")
|
| 13 |
+
parser.add_argument(
|
| 14 |
+
"--host", type=str, default="0.0.0.0", help="Host to bind the server to."
|
| 15 |
+
)
|
| 16 |
parser.add_argument("--port", type=int, default=8000, help="Port to run the server on.")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--enable-request-logging", action="store_true", help="Enable request logging."
|
| 19 |
+
)
|
| 20 |
+
parser.add_argument(
|
| 21 |
+
"--add-credential",
|
| 22 |
+
action="store_true",
|
| 23 |
+
help="Launch the interactive tool to add a new OAuth credential.",
|
| 24 |
+
)
|
| 25 |
args, _ = parser.parse_known_args()
|
| 26 |
|
| 27 |
# Add the 'src' directory to the Python path
|
|
|
|
| 31 |
if len(sys.argv) == 1:
|
| 32 |
# TUI MODE - Load ONLY what's needed for the launcher (fast path!)
|
| 33 |
from proxy_app.launcher_tui import run_launcher_tui
|
| 34 |
+
|
| 35 |
run_launcher_tui()
|
| 36 |
# Launcher modifies sys.argv and returns, or exits if user chose Exit
|
| 37 |
# If we get here, user chose "Run Proxy" and sys.argv is modified
|
|
|
|
| 41 |
# Check if credential tool mode (also doesn't need heavy proxy imports)
|
| 42 |
if args.add_credential:
|
| 43 |
from rotator_library.credential_tool import run_credential_tool
|
| 44 |
+
|
| 45 |
run_credential_tool()
|
| 46 |
sys.exit(0)
|
| 47 |
|
|
|
|
| 84 |
|
| 85 |
# Phase 2: Load Rich for loading spinner (lightweight)
|
| 86 |
from rich.console import Console
|
| 87 |
+
|
| 88 |
_console = Console()
|
| 89 |
|
| 90 |
# Phase 3: Heavy dependencies with granular loading messages
|
|
|
|
| 103 |
import json
|
| 104 |
from typing import AsyncGenerator, Any, List, Optional, Union
|
| 105 |
from pydantic import BaseModel, Field
|
| 106 |
+
|
| 107 |
# --- Early Log Level Configuration ---
|
| 108 |
logging.getLogger("LiteLLM").setLevel(logging.WARNING)
|
| 109 |
|
|
|
|
| 111 |
with _console.status("[dim]Loading LiteLLM library...", spinner="dots"):
|
| 112 |
import litellm
|
| 113 |
|
| 114 |
+
# Phase 4: Application imports with granular loading messages
|
| 115 |
print(" → Initializing proxy core...")
|
| 116 |
with _console.status("[dim]Initializing proxy core...", spinner="dots"):
|
| 117 |
from rotator_library import RotatingClient
|
|
|
|
| 126 |
# Provider lazy loading happens during import, so time it here
|
| 127 |
_provider_start = time.time()
|
| 128 |
with _console.status("[dim]Discovering provider plugins...", spinner="dots"):
|
| 129 |
+
from rotator_library import (
|
| 130 |
+
PROVIDER_PLUGINS,
|
| 131 |
+
) # This triggers lazy load via __getattr__
|
| 132 |
_provider_time = time.time() - _provider_start
|
| 133 |
|
| 134 |
# Get count after import (without timing to avoid double-counting)
|
| 135 |
_plugin_count = len(PROVIDER_PLUGINS)
|
| 136 |
|
| 137 |
+
|
| 138 |
# --- Pydantic Models ---
|
| 139 |
class EmbeddingRequest(BaseModel):
|
| 140 |
model: str
|
|
|
|
| 143 |
dimensions: Optional[int] = None
|
| 144 |
user: Optional[str] = None
|
| 145 |
|
| 146 |
+
|
| 147 |
class ModelCard(BaseModel):
|
| 148 |
"""Basic model card for minimal response."""
|
| 149 |
+
|
| 150 |
id: str
|
| 151 |
object: str = "model"
|
| 152 |
created: int = Field(default_factory=lambda: int(time.time()))
|
| 153 |
owned_by: str = "Mirro-Proxy"
|
| 154 |
|
| 155 |
+
|
| 156 |
class ModelCapabilities(BaseModel):
|
| 157 |
"""Model capability flags."""
|
| 158 |
+
|
| 159 |
tool_choice: bool = False
|
| 160 |
function_calling: bool = False
|
| 161 |
reasoning: bool = False
|
|
|
|
| 164 |
prompt_caching: bool = False
|
| 165 |
assistant_prefill: bool = False
|
| 166 |
|
| 167 |
+
|
| 168 |
class EnrichedModelCard(BaseModel):
|
| 169 |
"""Extended model card with pricing and capabilities."""
|
| 170 |
+
|
| 171 |
id: str
|
| 172 |
object: str = "model"
|
| 173 |
created: int = Field(default_factory=lambda: int(time.time()))
|
|
|
|
| 189 |
# Debug info (optional)
|
| 190 |
_sources: Optional[List[str]] = None
|
| 191 |
_match_type: Optional[str] = None
|
| 192 |
+
|
| 193 |
class Config:
|
| 194 |
extra = "allow" # Allow extra fields from the service
|
| 195 |
|
| 196 |
+
|
| 197 |
class ModelList(BaseModel):
|
| 198 |
"""List of models response."""
|
| 199 |
+
|
| 200 |
object: str = "list"
|
| 201 |
data: List[ModelCard]
|
| 202 |
|
| 203 |
+
|
| 204 |
class EnrichedModelList(BaseModel):
|
| 205 |
"""List of enriched models with pricing and capabilities."""
|
| 206 |
+
|
| 207 |
object: str = "list"
|
| 208 |
data: List[EnrichedModelCard]
|
| 209 |
|
| 210 |
+
|
| 211 |
# Calculate total loading time
|
| 212 |
_elapsed = time.time() - _start_time
|
| 213 |
+
print(
|
| 214 |
+
f"✓ Server ready in {_elapsed:.2f}s ({_plugin_count} providers discovered in {_provider_time:.2f}s)"
|
| 215 |
+
)
|
| 216 |
|
| 217 |
# Clear screen and reprint header for clean startup view
|
| 218 |
# This pushes loading messages up (still in scroll history) but shows a clean final screen
|
| 219 |
import os as _os_module
|
| 220 |
+
|
| 221 |
+
_os_module.system("cls" if _os_module.name == "nt" else "clear")
|
| 222 |
|
| 223 |
# Reprint header
|
| 224 |
print("━" * 70)
|
|
|
|
| 226 |
print(f"Proxy API Key: {key_display}")
|
| 227 |
print(f"GitHub: https://github.com/Mirrowel/LLM-API-Key-Proxy")
|
| 228 |
print("━" * 70)
|
| 229 |
+
print(
|
| 230 |
+
f"✓ Server ready in {_elapsed:.2f}s ({_plugin_count} providers discovered in {_provider_time:.2f}s)"
|
| 231 |
+
)
|
| 232 |
|
| 233 |
|
| 234 |
# Note: Debug logging will be added after logging configuration below
|
|
|
|
| 241 |
console_handler = colorlog.StreamHandler(sys.stdout)
|
| 242 |
console_handler.setLevel(logging.INFO)
|
| 243 |
formatter = colorlog.ColoredFormatter(
|
| 244 |
+
"%(log_color)s%(message)s",
|
| 245 |
log_colors={
|
| 246 |
+
"DEBUG": "cyan",
|
| 247 |
+
"INFO": "green",
|
| 248 |
+
"WARNING": "yellow",
|
| 249 |
+
"ERROR": "red",
|
| 250 |
+
"CRITICAL": "red,bg_white",
|
| 251 |
+
},
|
| 252 |
)
|
| 253 |
console_handler.setFormatter(formatter)
|
| 254 |
|
| 255 |
# Configure a file handler for INFO-level logs and higher
|
| 256 |
info_file_handler = logging.FileHandler(LOG_DIR / "proxy.log", encoding="utf-8")
|
| 257 |
info_file_handler.setLevel(logging.INFO)
|
| 258 |
+
info_file_handler.setFormatter(
|
| 259 |
+
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| 260 |
+
)
|
| 261 |
|
| 262 |
# Configure a dedicated file handler for all DEBUG-level logs
|
| 263 |
debug_file_handler = logging.FileHandler(LOG_DIR / "proxy_debug.log", encoding="utf-8")
|
| 264 |
debug_file_handler.setLevel(logging.DEBUG)
|
| 265 |
+
debug_file_handler.setFormatter(
|
| 266 |
+
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
|
| 270 |
# Create a filter to ensure the debug handler ONLY gets DEBUG messages from the rotator_library
|
| 271 |
class RotatorDebugFilter(logging.Filter):
|
| 272 |
def filter(self, record):
|
| 273 |
+
return record.levelno == logging.DEBUG and record.name.startswith(
|
| 274 |
+
"rotator_library"
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
|
| 278 |
debug_file_handler.addFilter(RotatorDebugFilter())
|
| 279 |
|
| 280 |
# Configure a console handler with color
|
| 281 |
console_handler = colorlog.StreamHandler(sys.stdout)
|
| 282 |
console_handler.setLevel(logging.INFO)
|
| 283 |
formatter = colorlog.ColoredFormatter(
|
| 284 |
+
"%(log_color)s%(message)s",
|
| 285 |
log_colors={
|
| 286 |
+
"DEBUG": "cyan",
|
| 287 |
+
"INFO": "green",
|
| 288 |
+
"WARNING": "yellow",
|
| 289 |
+
"ERROR": "red",
|
| 290 |
+
"CRITICAL": "red,bg_white",
|
| 291 |
+
},
|
| 292 |
)
|
| 293 |
console_handler.setFormatter(formatter)
|
| 294 |
|
| 295 |
+
|
| 296 |
# Add a filter to prevent any LiteLLM logs from cluttering the console
|
| 297 |
class NoLiteLLMLogFilter(logging.Filter):
|
| 298 |
def filter(self, record):
|
| 299 |
+
return not record.name.startswith("LiteLLM")
|
| 300 |
+
|
| 301 |
+
|
| 302 |
console_handler.addFilter(NoLiteLLMLogFilter())
|
| 303 |
|
| 304 |
# Get the root logger and set it to DEBUG to capture all messages
|
|
|
|
| 348 |
for key, value in os.environ.items():
|
| 349 |
if key.startswith("IGNORE_MODELS_"):
|
| 350 |
provider = key.replace("IGNORE_MODELS_", "").lower()
|
| 351 |
+
models_to_ignore = [
|
| 352 |
+
model.strip() for model in value.split(",") if model.strip()
|
| 353 |
+
]
|
| 354 |
ignore_models[provider] = models_to_ignore
|
| 355 |
+
logging.debug(
|
| 356 |
+
f"Loaded ignore list for provider '{provider}': {models_to_ignore}"
|
| 357 |
+
)
|
| 358 |
|
| 359 |
# Load model whitelist from environment variables
|
| 360 |
whitelist_models = {}
|
| 361 |
for key, value in os.environ.items():
|
| 362 |
if key.startswith("WHITELIST_MODELS_"):
|
| 363 |
provider = key.replace("WHITELIST_MODELS_", "").lower()
|
| 364 |
+
models_to_whitelist = [
|
| 365 |
+
model.strip() for model in value.split(",") if model.strip()
|
| 366 |
+
]
|
| 367 |
whitelist_models[provider] = models_to_whitelist
|
| 368 |
+
logging.debug(
|
| 369 |
+
f"Loaded whitelist for provider '{provider}': {models_to_whitelist}"
|
| 370 |
+
)
|
| 371 |
|
| 372 |
# Load max concurrent requests per key from environment variables
|
| 373 |
max_concurrent_requests_per_key = {}
|
|
|
|
| 377 |
try:
|
| 378 |
max_concurrent = int(value)
|
| 379 |
if max_concurrent < 1:
|
| 380 |
+
logging.warning(
|
| 381 |
+
f"Invalid max_concurrent value for provider '{provider}': {value}. Must be >= 1. Using default (1)."
|
| 382 |
+
)
|
| 383 |
max_concurrent = 1
|
| 384 |
max_concurrent_requests_per_key[provider] = max_concurrent
|
| 385 |
+
logging.debug(
|
| 386 |
+
f"Loaded max concurrent requests for provider '{provider}': {max_concurrent}"
|
| 387 |
+
)
|
| 388 |
except ValueError:
|
| 389 |
+
logging.warning(
|
| 390 |
+
f"Invalid max_concurrent value for provider '{provider}': {value}. Using default (1)."
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
|
| 394 |
# --- Lifespan Management ---
|
| 395 |
@asynccontextmanager
|
|
|
|
| 406 |
if not skip_oauth_init and oauth_credentials:
|
| 407 |
logging.info("Starting OAuth credential validation and deduplication...")
|
| 408 |
processed_emails = {} # email -> {provider: path}
|
| 409 |
+
credentials_to_initialize = {} # provider -> [paths]
|
| 410 |
final_oauth_credentials = {}
|
| 411 |
|
| 412 |
# --- Pass 1: Pre-initialization Scan & Deduplication ---
|
| 413 |
+
# logging.info("Pass 1: Scanning for existing metadata to find duplicates...")
|
| 414 |
for provider, paths in oauth_credentials.items():
|
| 415 |
if provider not in credentials_to_initialize:
|
| 416 |
credentials_to_initialize[provider] = []
|
|
|
|
| 419 |
if path.startswith("env://"):
|
| 420 |
credentials_to_initialize[provider].append(path)
|
| 421 |
continue
|
| 422 |
+
|
| 423 |
try:
|
| 424 |
+
with open(path, "r") as f:
|
| 425 |
data = json.load(f)
|
| 426 |
metadata = data.get("_proxy_metadata", {})
|
| 427 |
email = metadata.get("email")
|
|
|
|
| 429 |
if email:
|
| 430 |
if email not in processed_emails:
|
| 431 |
processed_emails[email] = {}
|
| 432 |
+
|
| 433 |
if provider in processed_emails[email]:
|
| 434 |
original_path = processed_emails[email][provider]
|
| 435 |
+
logging.warning(
|
| 436 |
+
f"Duplicate for '{email}' on '{provider}' found in pre-scan: '{Path(path).name}'. Original: '{Path(original_path).name}'. Skipping."
|
| 437 |
+
)
|
| 438 |
continue
|
| 439 |
else:
|
| 440 |
processed_emails[email][provider] = path
|
| 441 |
+
|
| 442 |
credentials_to_initialize[provider].append(path)
|
| 443 |
|
| 444 |
except (FileNotFoundError, json.JSONDecodeError) as e:
|
| 445 |
+
logging.warning(
|
| 446 |
+
f"Could not pre-read metadata from '{path}': {e}. Will process during initialization."
|
| 447 |
+
)
|
| 448 |
credentials_to_initialize[provider].append(path)
|
| 449 |
+
|
| 450 |
# --- Pass 2: Parallel Initialization of Filtered Credentials ---
|
| 451 |
+
# logging.info("Pass 2: Initializing unique credentials and performing final check...")
|
| 452 |
async def process_credential(provider: str, path: str, provider_instance):
|
| 453 |
"""Process a single credential: initialize and fetch user info."""
|
| 454 |
try:
|
| 455 |
await provider_instance.initialize_token(path)
|
| 456 |
|
| 457 |
+
if not hasattr(provider_instance, "get_user_info"):
|
| 458 |
return (provider, path, None, None)
|
| 459 |
|
| 460 |
user_info = await provider_instance.get_user_info(path)
|
|
|
|
| 462 |
return (provider, path, email, None)
|
| 463 |
|
| 464 |
except Exception as e:
|
| 465 |
+
logging.error(
|
| 466 |
+
f"Failed to process OAuth token for {provider} at '{path}': {e}"
|
| 467 |
+
)
|
| 468 |
return (provider, path, None, e)
|
| 469 |
|
| 470 |
# Collect all tasks for parallel execution
|
|
|
|
| 476 |
provider_plugin_class = PROVIDER_PLUGINS.get(provider)
|
| 477 |
if not provider_plugin_class:
|
| 478 |
continue
|
| 479 |
+
|
| 480 |
provider_instance = provider_plugin_class()
|
| 481 |
+
|
| 482 |
for path in paths:
|
| 483 |
tasks.append(process_credential(provider, path, provider_instance))
|
| 484 |
|
|
|
|
| 493 |
continue
|
| 494 |
|
| 495 |
provider, path, email, error = result
|
| 496 |
+
|
| 497 |
# Skip if there was an error
|
| 498 |
if error:
|
| 499 |
continue
|
|
|
|
| 507 |
|
| 508 |
# Handle empty email
|
| 509 |
if not email:
|
| 510 |
+
logging.warning(
|
| 511 |
+
f"Could not retrieve email for '{path}'. Treating as unique."
|
| 512 |
+
)
|
| 513 |
if provider not in final_oauth_credentials:
|
| 514 |
final_oauth_credentials[provider] = []
|
| 515 |
final_oauth_credentials[provider].append(path)
|
|
|
|
| 518 |
# Deduplication check
|
| 519 |
if email not in processed_emails:
|
| 520 |
processed_emails[email] = {}
|
| 521 |
+
|
| 522 |
+
if (
|
| 523 |
+
provider in processed_emails[email]
|
| 524 |
+
and processed_emails[email][provider] != path
|
| 525 |
+
):
|
| 526 |
original_path = processed_emails[email][provider]
|
| 527 |
+
logging.warning(
|
| 528 |
+
f"Duplicate for '{email}' on '{provider}' found post-init: '{Path(path).name}'. Original: '{Path(original_path).name}'. Skipping."
|
| 529 |
+
)
|
| 530 |
continue
|
| 531 |
else:
|
| 532 |
processed_emails[email][provider] = path
|
|
|
|
| 537 |
# Update metadata (skip for env-based credentials - they don't have files)
|
| 538 |
if not path.startswith("env://"):
|
| 539 |
try:
|
| 540 |
+
with open(path, "r+") as f:
|
| 541 |
data = json.load(f)
|
| 542 |
metadata = data.get("_proxy_metadata", {})
|
| 543 |
metadata["email"] = email
|
|
|
|
| 560 |
# The client now uses the root logger configuration
|
| 561 |
client = RotatingClient(
|
| 562 |
api_keys=api_keys,
|
| 563 |
+
oauth_credentials=oauth_credentials, # Pass OAuth config
|
| 564 |
configure_logging=True,
|
| 565 |
litellm_provider_params=litellm_provider_params,
|
| 566 |
ignore_models=ignore_models,
|
| 567 |
whitelist_models=whitelist_models,
|
| 568 |
enable_request_logging=ENABLE_REQUEST_LOGGING,
|
| 569 |
+
max_concurrent_requests_per_key=max_concurrent_requests_per_key,
|
| 570 |
)
|
| 571 |
+
|
| 572 |
# Log loaded credentials summary (compact, always visible for deployment verification)
|
| 573 |
#_api_summary = ', '.join([f"{p}:{len(c)}" for p, c in api_keys.items()]) if api_keys else "none"
|
| 574 |
#_oauth_summary = ', '.join([f"{p}:{len(c)}" for p, c in oauth_credentials.items()]) if oauth_credentials else "none"
|
|
|
|
| 576 |
#print(f"🔑 Credentials loaded: {_total_summary} (API: {_api_summary} | OAuth: {_oauth_summary})")
|
| 577 |
client.background_refresher.start() # Start the background task
|
| 578 |
app.state.rotating_client = client
|
| 579 |
+
|
| 580 |
# Warn if no provider credentials are configured
|
| 581 |
if not client.all_credentials:
|
| 582 |
logging.warning("=" * 70)
|
| 583 |
logging.warning("⚠️ NO PROVIDER CREDENTIALS CONFIGURED")
|
| 584 |
logging.warning("The proxy is running but cannot serve any LLM requests.")
|
| 585 |
+
logging.warning(
|
| 586 |
+
"Launch the credential tool to add API keys or OAuth credentials."
|
| 587 |
+
)
|
| 588 |
logging.warning(" • Executable: Run with --add-credential flag")
|
| 589 |
logging.warning(" • Source: python src/proxy_app/main.py --add-credential")
|
| 590 |
logging.warning("=" * 70)
|
| 591 |
+
|
| 592 |
os.environ["LITELLM_LOG"] = "ERROR"
|
| 593 |
litellm.set_verbose = False
|
| 594 |
litellm.drop_params = True
|
|
|
|
| 599 |
else:
|
| 600 |
app.state.embedding_batcher = None
|
| 601 |
logging.info("RotatingClient initialized (EmbeddingBatcher disabled).")
|
| 602 |
+
|
| 603 |
# Start model info service in background (fetches pricing/capabilities data)
|
| 604 |
# This runs asynchronously and doesn't block proxy startup
|
| 605 |
model_info_service = await init_model_info_service()
|
| 606 |
app.state.model_info_service = model_info_service
|
| 607 |
logging.info("Model info service started (fetching pricing data in background).")
|
| 608 |
+
|
| 609 |
yield
|
| 610 |
+
|
| 611 |
+
await client.background_refresher.stop() # Stop the background task on shutdown
|
| 612 |
if app.state.embedding_batcher:
|
| 613 |
await app.state.embedding_batcher.stop()
|
| 614 |
await client.close()
|
| 615 |
+
|
| 616 |
# Stop model info service
|
| 617 |
+
if hasattr(app.state, "model_info_service") and app.state.model_info_service:
|
| 618 |
await app.state.model_info_service.stop()
|
| 619 |
+
|
| 620 |
if app.state.embedding_batcher:
|
| 621 |
logging.info("RotatingClient and EmbeddingBatcher closed.")
|
| 622 |
else:
|
| 623 |
logging.info("RotatingClient closed.")
|
| 624 |
|
| 625 |
+
|
| 626 |
# --- FastAPI App Setup ---
|
| 627 |
app = FastAPI(lifespan=lifespan)
|
| 628 |
|
|
|
|
| 636 |
)
|
| 637 |
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
|
| 638 |
|
| 639 |
+
|
| 640 |
def get_rotating_client(request: Request) -> RotatingClient:
|
| 641 |
"""Dependency to get the rotating client instance from the app state."""
|
| 642 |
return request.app.state.rotating_client
|
| 643 |
|
| 644 |
+
|
| 645 |
def get_embedding_batcher(request: Request) -> EmbeddingBatcher:
|
| 646 |
"""Dependency to get the embedding batcher instance from the app state."""
|
| 647 |
return request.app.state.embedding_batcher
|
| 648 |
|
| 649 |
+
|
| 650 |
async def verify_api_key(auth: str = Depends(api_key_header)):
|
| 651 |
"""Dependency to verify the proxy API key."""
|
| 652 |
+
# If PROXY_API_KEY is not set or empty, skip verification (open access)
|
| 653 |
+
if not PROXY_API_KEY:
|
| 654 |
+
return auth
|
| 655 |
if not auth or auth != f"Bearer {PROXY_API_KEY}":
|
| 656 |
raise HTTPException(status_code=401, detail="Invalid or missing API Key")
|
| 657 |
return auth
|
| 658 |
|
| 659 |
+
|
| 660 |
async def streaming_response_wrapper(
|
| 661 |
request: Request,
|
| 662 |
request_data: dict,
|
| 663 |
response_stream: AsyncGenerator[str, None],
|
| 664 |
+
logger: Optional[DetailedLogger] = None,
|
| 665 |
) -> AsyncGenerator[str, None]:
|
| 666 |
"""
|
| 667 |
Wraps a streaming response to log the full response after completion
|
|
|
|
| 669 |
"""
|
| 670 |
response_chunks = []
|
| 671 |
full_response = {}
|
| 672 |
+
|
| 673 |
try:
|
| 674 |
async for chunk_str in response_stream:
|
| 675 |
if await request.is_disconnected():
|
|
|
|
| 677 |
break
|
| 678 |
yield chunk_str
|
| 679 |
if chunk_str.strip() and chunk_str.startswith("data:"):
|
| 680 |
+
content = chunk_str[len("data:") :].strip()
|
| 681 |
if content != "[DONE]":
|
| 682 |
try:
|
| 683 |
chunk_data = json.loads(content)
|
|
|
|
| 693 |
"error": {
|
| 694 |
"message": f"An unexpected error occurred during the stream: {str(e)}",
|
| 695 |
"type": "proxy_internal_error",
|
| 696 |
+
"code": 500,
|
| 697 |
}
|
| 698 |
}
|
| 699 |
yield f"data: {json.dumps(error_payload)}\n\n"
|
| 700 |
yield "data: [DONE]\n\n"
|
| 701 |
# Also log this as a failed request
|
| 702 |
if logger:
|
| 703 |
+
logger.log_final_response(
|
| 704 |
+
status_code=500, headers=None, body={"error": str(e)}
|
| 705 |
+
)
|
| 706 |
+
return # Stop further processing
|
| 707 |
finally:
|
| 708 |
if response_chunks:
|
| 709 |
# --- Aggregation Logic ---
|
|
|
|
| 727 |
final_message["content"] = ""
|
| 728 |
if value:
|
| 729 |
final_message["content"] += value
|
| 730 |
+
|
| 731 |
elif key == "tool_calls":
|
| 732 |
for tc_chunk in value:
|
| 733 |
index = tc_chunk["index"]
|
| 734 |
if index not in aggregated_tool_calls:
|
| 735 |
+
aggregated_tool_calls[index] = {
|
| 736 |
+
"type": "function",
|
| 737 |
+
"function": {"name": "", "arguments": ""},
|
| 738 |
+
}
|
| 739 |
# Ensure 'function' key exists for this index before accessing its sub-keys
|
| 740 |
if "function" not in aggregated_tool_calls[index]:
|
| 741 |
+
aggregated_tool_calls[index]["function"] = {
|
| 742 |
+
"name": "",
|
| 743 |
+
"arguments": "",
|
| 744 |
+
}
|
| 745 |
if tc_chunk.get("id"):
|
| 746 |
aggregated_tool_calls[index]["id"] = tc_chunk["id"]
|
| 747 |
if "function" in tc_chunk:
|
| 748 |
if "name" in tc_chunk["function"]:
|
| 749 |
if tc_chunk["function"]["name"] is not None:
|
| 750 |
+
aggregated_tool_calls[index]["function"][
|
| 751 |
+
"name"
|
| 752 |
+
] += tc_chunk["function"]["name"]
|
| 753 |
if "arguments" in tc_chunk["function"]:
|
| 754 |
+
if (
|
| 755 |
+
tc_chunk["function"]["arguments"]
|
| 756 |
+
is not None
|
| 757 |
+
):
|
| 758 |
+
aggregated_tool_calls[index]["function"][
|
| 759 |
+
"arguments"
|
| 760 |
+
] += tc_chunk["function"]["arguments"]
|
| 761 |
+
|
| 762 |
elif key == "function_call":
|
| 763 |
if "function_call" not in final_message:
|
| 764 |
+
final_message["function_call"] = {
|
| 765 |
+
"name": "",
|
| 766 |
+
"arguments": "",
|
| 767 |
+
}
|
| 768 |
if "name" in value:
|
| 769 |
if value["name"] is not None:
|
| 770 |
+
final_message["function_call"]["name"] += value[
|
| 771 |
+
"name"
|
| 772 |
+
]
|
| 773 |
if "arguments" in value:
|
| 774 |
if value["arguments"] is not None:
|
| 775 |
+
final_message["function_call"]["arguments"] += (
|
| 776 |
+
value["arguments"]
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
else: # Generic key handling for other data like 'reasoning'
|
| 780 |
# FIX: Role should always replace, never concatenate
|
| 781 |
if key == "role":
|
| 782 |
final_message[key] = value
|
|
|
|
| 809 |
final_choice = {
|
| 810 |
"index": 0,
|
| 811 |
"message": final_message,
|
| 812 |
+
"finish_reason": finish_reason,
|
| 813 |
}
|
| 814 |
|
| 815 |
full_response = {
|
|
|
|
| 818 |
"created": first_chunk.get("created"),
|
| 819 |
"model": first_chunk.get("model"),
|
| 820 |
"choices": [final_choice],
|
| 821 |
+
"usage": usage_data,
|
| 822 |
}
|
| 823 |
|
| 824 |
if logger:
|
| 825 |
logger.log_final_response(
|
| 826 |
status_code=200,
|
| 827 |
headers=None, # Headers are not available at this stage
|
| 828 |
+
body=full_response,
|
| 829 |
)
|
| 830 |
|
| 831 |
+
|
| 832 |
@app.post("/v1/chat/completions")
|
| 833 |
async def chat_completions(
|
| 834 |
request: Request,
|
| 835 |
client: RotatingClient = Depends(get_rotating_client),
|
| 836 |
+
_=Depends(verify_api_key),
|
| 837 |
):
|
| 838 |
"""
|
| 839 |
OpenAI-compatible endpoint powered by the RotatingClient.
|
|
|
|
| 852 |
# instead of actual schemas, which can cause tool hallucination
|
| 853 |
# Modes: "remove" = delete temperature key, "set" = change to 1.0, "false" = disabled
|
| 854 |
override_temp_zero = os.getenv("OVERRIDE_TEMPERATURE_ZERO", "false").lower()
|
| 855 |
+
|
| 856 |
+
if (
|
| 857 |
+
override_temp_zero in ("remove", "set", "true", "1", "yes")
|
| 858 |
+
and "temperature" in request_data
|
| 859 |
+
and request_data["temperature"] == 0
|
| 860 |
+
):
|
| 861 |
if override_temp_zero == "remove":
|
| 862 |
# Remove temperature key entirely
|
| 863 |
del request_data["temperature"]
|
| 864 |
+
logging.debug(
|
| 865 |
+
"OVERRIDE_TEMPERATURE_ZERO=remove: Removed temperature=0 from request"
|
| 866 |
+
)
|
| 867 |
else:
|
| 868 |
# Set to 1.0 (for "set", "true", "1", "yes")
|
| 869 |
request_data["temperature"] = 1.0
|
| 870 |
+
logging.debug(
|
| 871 |
+
"OVERRIDE_TEMPERATURE_ZERO=set: Converting temperature=0 to temperature=1.0"
|
| 872 |
+
)
|
| 873 |
|
| 874 |
# If logging is enabled, perform all logging operations using the parsed data.
|
| 875 |
if logger:
|
|
|
|
| 877 |
|
| 878 |
# Extract and log specific reasoning parameters for monitoring.
|
| 879 |
model = request_data.get("model")
|
| 880 |
+
generation_cfg = (
|
| 881 |
+
request_data.get("generationConfig", {})
|
| 882 |
+
or request_data.get("generation_config", {})
|
| 883 |
+
or {}
|
| 884 |
+
)
|
| 885 |
+
reasoning_effort = request_data.get("reasoning_effort") or generation_cfg.get(
|
| 886 |
+
"reasoning_effort"
|
| 887 |
+
)
|
| 888 |
+
custom_reasoning_budget = request_data.get(
|
| 889 |
+
"custom_reasoning_budget"
|
| 890 |
+
) or generation_cfg.get("custom_reasoning_budget", False)
|
| 891 |
|
| 892 |
logging.getLogger("rotator_library").debug(
|
| 893 |
f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
|
|
|
|
| 898 |
url=str(request.url),
|
| 899 |
headers=dict(request.headers),
|
| 900 |
client_info=(request.client.host, request.client.port),
|
| 901 |
+
request_data=request_data,
|
| 902 |
)
|
| 903 |
is_streaming = request_data.get("stream", False)
|
| 904 |
|
| 905 |
if is_streaming:
|
| 906 |
response_generator = client.acompletion(request=request, **request_data)
|
| 907 |
return StreamingResponse(
|
| 908 |
+
streaming_response_wrapper(
|
| 909 |
+
request, request_data, response_generator, logger
|
| 910 |
+
),
|
| 911 |
+
media_type="text/event-stream",
|
| 912 |
)
|
| 913 |
else:
|
| 914 |
response = await client.acompletion(request=request, **request_data)
|
| 915 |
if logger:
|
| 916 |
# Assuming response has status_code and headers attributes
|
| 917 |
# This might need adjustment based on the actual response object
|
| 918 |
+
response_headers = (
|
| 919 |
+
response.headers if hasattr(response, "headers") else None
|
| 920 |
+
)
|
| 921 |
+
status_code = (
|
| 922 |
+
response.status_code if hasattr(response, "status_code") else 200
|
| 923 |
+
)
|
| 924 |
logger.log_final_response(
|
| 925 |
status_code=status_code,
|
| 926 |
headers=response_headers,
|
| 927 |
+
body=response.model_dump(),
|
| 928 |
)
|
| 929 |
return response
|
| 930 |
|
| 931 |
+
except (
|
| 932 |
+
litellm.InvalidRequestError,
|
| 933 |
+
ValueError,
|
| 934 |
+
litellm.ContextWindowExceededError,
|
| 935 |
+
) as e:
|
| 936 |
raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
|
| 937 |
except litellm.AuthenticationError as e:
|
| 938 |
raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
|
|
|
|
| 953 |
except json.JSONDecodeError:
|
| 954 |
request_data = {"error": "Could not parse request body"}
|
| 955 |
if logger:
|
| 956 |
+
logger.log_final_response(
|
| 957 |
+
status_code=500, headers=None, body={"error": str(e)}
|
| 958 |
+
)
|
| 959 |
raise HTTPException(status_code=500, detail=str(e))
|
| 960 |
|
| 961 |
+
|
| 962 |
@app.post("/v1/embeddings")
|
| 963 |
async def embeddings(
|
| 964 |
request: Request,
|
| 965 |
body: EmbeddingRequest,
|
| 966 |
client: RotatingClient = Depends(get_rotating_client),
|
| 967 |
batcher: Optional[EmbeddingBatcher] = Depends(get_embedding_batcher),
|
| 968 |
+
_=Depends(verify_api_key),
|
| 969 |
):
|
| 970 |
"""
|
| 971 |
OpenAI-compatible endpoint for creating embeddings.
|
|
|
|
| 979 |
url=str(request.url),
|
| 980 |
headers=dict(request.headers),
|
| 981 |
client_info=(request.client.host, request.client.port),
|
| 982 |
+
request_data=request_data,
|
| 983 |
)
|
| 984 |
if USE_EMBEDDING_BATCHER and batcher:
|
| 985 |
# --- Server-Side Batching Logic ---
|
|
|
|
| 993 |
individual_request = request_data.copy()
|
| 994 |
individual_request["input"] = single_input
|
| 995 |
tasks.append(batcher.add_request(individual_request))
|
| 996 |
+
|
| 997 |
results = await asyncio.gather(*tasks)
|
| 998 |
|
| 999 |
all_data = []
|
|
|
|
| 1009 |
"object": "list",
|
| 1010 |
"model": results[0]["model"],
|
| 1011 |
"data": all_data,
|
| 1012 |
+
"usage": {
|
| 1013 |
+
"prompt_tokens": total_prompt_tokens,
|
| 1014 |
+
"total_tokens": total_tokens,
|
| 1015 |
+
},
|
| 1016 |
}
|
| 1017 |
response = litellm.EmbeddingResponse(**final_response_data)
|
| 1018 |
+
|
| 1019 |
else:
|
| 1020 |
# --- Direct Pass-Through Logic ---
|
| 1021 |
request_data = body.model_dump(exclude_none=True)
|
| 1022 |
if isinstance(request_data.get("input"), str):
|
| 1023 |
request_data["input"] = [request_data["input"]]
|
| 1024 |
+
|
| 1025 |
response = await client.aembedding(request=request, **request_data)
|
| 1026 |
|
| 1027 |
return response
|
|
|
|
| 1029 |
except HTTPException as e:
|
| 1030 |
# Re-raise HTTPException to ensure it's not caught by the generic Exception handler
|
| 1031 |
raise e
|
| 1032 |
+
except (
|
| 1033 |
+
litellm.InvalidRequestError,
|
| 1034 |
+
ValueError,
|
| 1035 |
+
litellm.ContextWindowExceededError,
|
| 1036 |
+
) as e:
|
| 1037 |
raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
|
| 1038 |
except litellm.AuthenticationError as e:
|
| 1039 |
raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
|
|
|
|
| 1049 |
logging.error(f"Embedding request failed: {e}")
|
| 1050 |
raise HTTPException(status_code=500, detail=str(e))
|
| 1051 |
|
| 1052 |
+
|
| 1053 |
@app.get("/")
|
| 1054 |
def read_root():
|
| 1055 |
return {"Status": "API Key Proxy is running"}
|
| 1056 |
|
| 1057 |
+
|
| 1058 |
@app.get("/v1/models")
|
| 1059 |
async def list_models(
|
| 1060 |
request: Request,
|
|
|
|
| 1064 |
):
|
| 1065 |
"""
|
| 1066 |
Returns a list of available models in the OpenAI-compatible format.
|
| 1067 |
+
|
| 1068 |
Query Parameters:
|
| 1069 |
enriched: If True (default), returns detailed model info with pricing and capabilities.
|
| 1070 |
If False, returns minimal OpenAI-compatible response.
|
| 1071 |
"""
|
| 1072 |
model_ids = await client.get_all_available_models(grouped=False)
|
| 1073 |
+
|
| 1074 |
+
if enriched and hasattr(request.app.state, "model_info_service"):
|
| 1075 |
model_info_service = request.app.state.model_info_service
|
| 1076 |
if model_info_service.is_ready:
|
| 1077 |
# Return enriched model data
|
| 1078 |
enriched_data = model_info_service.enrich_model_list(model_ids)
|
| 1079 |
return {"object": "list", "data": enriched_data}
|
| 1080 |
+
|
| 1081 |
# Fallback to basic model cards
|
| 1082 |
+
model_cards = [
|
| 1083 |
+
{
|
| 1084 |
+
"id": model_id,
|
| 1085 |
+
"object": "model",
|
| 1086 |
+
"created": int(time.time()),
|
| 1087 |
+
"owned_by": "Mirro-Proxy",
|
| 1088 |
+
}
|
| 1089 |
+
for model_id in model_ids
|
| 1090 |
+
]
|
| 1091 |
return {"object": "list", "data": model_cards}
|
| 1092 |
|
| 1093 |
|
|
|
|
| 1099 |
):
|
| 1100 |
"""
|
| 1101 |
Returns detailed information about a specific model.
|
| 1102 |
+
|
| 1103 |
Path Parameters:
|
| 1104 |
model_id: The model ID (e.g., "anthropic/claude-3-opus", "openrouter/openai/gpt-4")
|
| 1105 |
"""
|
| 1106 |
+
if hasattr(request.app.state, "model_info_service"):
|
| 1107 |
model_info_service = request.app.state.model_info_service
|
| 1108 |
if model_info_service.is_ready:
|
| 1109 |
info = model_info_service.get_model_info(model_id)
|
| 1110 |
if info:
|
| 1111 |
return info.to_dict()
|
| 1112 |
+
|
| 1113 |
# Return basic info if service not ready or model not found
|
| 1114 |
return {
|
| 1115 |
"id": model_id,
|
|
|
|
| 1127 |
"""
|
| 1128 |
Returns statistics about the model info service (for monitoring/debugging).
|
| 1129 |
"""
|
| 1130 |
+
if hasattr(request.app.state, "model_info_service"):
|
| 1131 |
return request.app.state.model_info_service.get_stats()
|
| 1132 |
return {"error": "Model info service not initialized"}
|
| 1133 |
|
|
|
|
| 1139 |
"""
|
| 1140 |
return list(PROVIDER_PLUGINS.keys())
|
| 1141 |
|
| 1142 |
+
|
| 1143 |
@app.post("/v1/token-count")
|
| 1144 |
async def token_count(
|
| 1145 |
+
request: Request,
|
| 1146 |
client: RotatingClient = Depends(get_rotating_client),
|
| 1147 |
+
_=Depends(verify_api_key),
|
| 1148 |
):
|
| 1149 |
"""
|
| 1150 |
Calculates the token count for a given list of messages and a model.
|
|
|
|
| 1155 |
messages = data.get("messages")
|
| 1156 |
|
| 1157 |
if not model or not messages:
|
| 1158 |
+
raise HTTPException(
|
| 1159 |
+
status_code=400, detail="'model' and 'messages' are required."
|
| 1160 |
+
)
|
| 1161 |
|
| 1162 |
count = client.token_count(**data)
|
| 1163 |
return {"token_count": count}
|
|
|
|
| 1168 |
|
| 1169 |
|
| 1170 |
@app.post("/v1/cost-estimate")
|
| 1171 |
+
async def cost_estimate(request: Request, _=Depends(verify_api_key)):
|
|
|
|
|
|
|
|
|
|
| 1172 |
"""
|
| 1173 |
Estimates the cost for a request based on token counts and model pricing.
|
| 1174 |
+
|
| 1175 |
Request body:
|
| 1176 |
{
|
| 1177 |
"model": "anthropic/claude-3-opus",
|
|
|
|
| 1180 |
"cache_read_tokens": 0, # optional
|
| 1181 |
"cache_creation_tokens": 0 # optional
|
| 1182 |
}
|
| 1183 |
+
|
| 1184 |
Returns:
|
| 1185 |
{
|
| 1186 |
"model": "anthropic/claude-3-opus",
|
|
|
|
| 1200 |
completion_tokens = data.get("completion_tokens", 0)
|
| 1201 |
cache_read_tokens = data.get("cache_read_tokens", 0)
|
| 1202 |
cache_creation_tokens = data.get("cache_creation_tokens", 0)
|
| 1203 |
+
|
| 1204 |
if not model:
|
| 1205 |
raise HTTPException(status_code=400, detail="'model' is required.")
|
| 1206 |
+
|
| 1207 |
result = {
|
| 1208 |
"model": model,
|
| 1209 |
"cost": None,
|
| 1210 |
"currency": "USD",
|
| 1211 |
"pricing": {},
|
| 1212 |
+
"source": None,
|
| 1213 |
}
|
| 1214 |
+
|
| 1215 |
# Try model info service first
|
| 1216 |
+
if hasattr(request.app.state, "model_info_service"):
|
| 1217 |
model_info_service = request.app.state.model_info_service
|
| 1218 |
if model_info_service.is_ready:
|
| 1219 |
cost = model_info_service.calculate_cost(
|
| 1220 |
+
model,
|
| 1221 |
+
prompt_tokens,
|
| 1222 |
+
completion_tokens,
|
| 1223 |
+
cache_read_tokens,
|
| 1224 |
+
cache_creation_tokens,
|
| 1225 |
)
|
| 1226 |
if cost is not None:
|
| 1227 |
cost_info = model_info_service.get_cost_info(model)
|
|
|
|
| 1229 |
result["pricing"] = cost_info or {}
|
| 1230 |
result["source"] = "model_info_service"
|
| 1231 |
return result
|
| 1232 |
+
|
| 1233 |
# Fallback to litellm
|
| 1234 |
try:
|
| 1235 |
import litellm
|
| 1236 |
+
|
| 1237 |
# Create a mock response for cost calculation
|
| 1238 |
model_info = litellm.get_model_info(model)
|
| 1239 |
input_cost = model_info.get("input_cost_per_token", 0)
|
| 1240 |
output_cost = model_info.get("output_cost_per_token", 0)
|
| 1241 |
+
|
| 1242 |
if input_cost or output_cost:
|
| 1243 |
cost = (prompt_tokens * input_cost) + (completion_tokens * output_cost)
|
| 1244 |
result["cost"] = cost
|
| 1245 |
result["pricing"] = {
|
| 1246 |
"input_cost_per_token": input_cost,
|
| 1247 |
+
"output_cost_per_token": output_cost,
|
| 1248 |
}
|
| 1249 |
result["source"] = "litellm_fallback"
|
| 1250 |
return result
|
| 1251 |
except Exception:
|
| 1252 |
pass
|
| 1253 |
+
|
| 1254 |
result["source"] = "unknown"
|
| 1255 |
result["error"] = "Pricing data not available for this model"
|
| 1256 |
return result
|
| 1257 |
+
|
| 1258 |
except HTTPException:
|
| 1259 |
raise
|
| 1260 |
except Exception as e:
|
|
|
|
| 1265 |
if __name__ == "__main__":
|
| 1266 |
# Define ENV_FILE for onboarding checks
|
| 1267 |
ENV_FILE = Path.cwd() / ".env"
|
| 1268 |
+
|
| 1269 |
# Check if launcher TUI should be shown (no arguments provided)
|
| 1270 |
if len(sys.argv) == 1:
|
| 1271 |
# No arguments - show launcher TUI (lazy import)
|
| 1272 |
from proxy_app.launcher_tui import run_launcher_tui
|
| 1273 |
+
|
| 1274 |
run_launcher_tui()
|
| 1275 |
# Launcher modifies sys.argv and returns, or exits if user chose Exit
|
| 1276 |
# If we get here, user chose "Run Proxy" and sys.argv is modified
|
| 1277 |
# Re-parse arguments with modified sys.argv
|
| 1278 |
args = parser.parse_args()
|
| 1279 |
+
|
| 1280 |
def needs_onboarding() -> bool:
|
| 1281 |
"""
|
| 1282 |
Check if the proxy needs onboarding (first-time setup).
|
|
|
|
| 1286 |
# PROXY_API_KEY is optional (will show warning if not set)
|
| 1287 |
if not ENV_FILE.is_file():
|
| 1288 |
return True
|
| 1289 |
+
|
| 1290 |
return False
|
| 1291 |
|
| 1292 |
def show_onboarding_message():
|
| 1293 |
"""Display clear explanatory message for why onboarding is needed."""
|
| 1294 |
+
os.system(
|
| 1295 |
+
"cls" if os.name == "nt" else "clear"
|
| 1296 |
+
) # Clear terminal for clean presentation
|
| 1297 |
+
console.print(
|
| 1298 |
+
Panel.fit(
|
| 1299 |
+
"[bold cyan]🚀 LLM API Key Proxy - First Time Setup[/bold cyan]",
|
| 1300 |
+
border_style="cyan",
|
| 1301 |
+
)
|
| 1302 |
+
)
|
| 1303 |
console.print("[bold yellow]⚠️ Configuration Required[/bold yellow]\n")
|
| 1304 |
+
|
| 1305 |
console.print("The proxy needs initial configuration:")
|
| 1306 |
console.print(" [red]❌ No .env file found[/red]")
|
| 1307 |
+
|
| 1308 |
console.print("\n[bold]Why this matters:[/bold]")
|
| 1309 |
console.print(" • The .env file stores your credentials and settings")
|
| 1310 |
console.print(" • PROXY_API_KEY protects your proxy from unauthorized access")
|
| 1311 |
console.print(" • Provider API keys enable LLM access")
|
| 1312 |
+
|
| 1313 |
console.print("\n[bold]What happens next:[/bold]")
|
| 1314 |
console.print(" 1. We'll create a .env file with PROXY_API_KEY")
|
| 1315 |
console.print(" 2. You can add LLM provider credentials (API keys or OAuth)")
|
| 1316 |
console.print(" 3. The proxy will then start normally")
|
| 1317 |
+
|
| 1318 |
+
console.print(
|
| 1319 |
+
"\n[bold yellow]⚠️ Note:[/bold yellow] The credential tool adds PROXY_API_KEY by default."
|
| 1320 |
+
)
|
| 1321 |
console.print(" You can remove it later if you want an unsecured proxy.\n")
|
| 1322 |
+
|
| 1323 |
+
console.input(
|
| 1324 |
+
"[bold green]Press Enter to launch the credential setup tool...[/bold green]"
|
| 1325 |
+
)
|
| 1326 |
|
| 1327 |
# Check if user explicitly wants to add credentials
|
| 1328 |
if args.add_credential:
|
| 1329 |
# Import and call ensure_env_defaults to create .env and PROXY_API_KEY if needed
|
| 1330 |
from rotator_library.credential_tool import ensure_env_defaults
|
| 1331 |
+
|
| 1332 |
ensure_env_defaults()
|
| 1333 |
# Reload environment variables after ensure_env_defaults creates/updates .env
|
| 1334 |
load_dotenv(override=True)
|
|
|
|
| 1339 |
# Import console from rich for better messaging
|
| 1340 |
from rich.console import Console
|
| 1341 |
from rich.panel import Panel
|
| 1342 |
+
|
| 1343 |
console = Console()
|
| 1344 |
+
|
| 1345 |
# Show clear explanatory message
|
| 1346 |
show_onboarding_message()
|
| 1347 |
+
|
| 1348 |
# Launch credential tool automatically
|
| 1349 |
from rotator_library.credential_tool import ensure_env_defaults
|
| 1350 |
+
|
| 1351 |
ensure_env_defaults()
|
| 1352 |
load_dotenv(override=True)
|
| 1353 |
run_credential_tool()
|
| 1354 |
+
|
| 1355 |
# After credential tool exits, reload and re-check
|
| 1356 |
load_dotenv(override=True)
|
| 1357 |
# Re-read PROXY_API_KEY from environment
|
| 1358 |
PROXY_API_KEY = os.getenv("PROXY_API_KEY")
|
| 1359 |
+
|
| 1360 |
# Verify onboarding is complete
|
| 1361 |
if needs_onboarding():
|
| 1362 |
console.print("\n[bold red]❌ Configuration incomplete.[/bold red]")
|
| 1363 |
+
console.print(
|
| 1364 |
+
"The proxy still cannot start. Please ensure PROXY_API_KEY is set in .env\n"
|
| 1365 |
+
)
|
| 1366 |
sys.exit(1)
|
| 1367 |
else:
|
| 1368 |
console.print("\n[bold green]✅ Configuration complete![/bold green]")
|
| 1369 |
console.print("\nStarting proxy server...\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1370 |
|
| 1371 |
+
import uvicorn
|
| 1372 |
|
| 1373 |
+
uvicorn.run(app, host=args.host, port=args.port)
|
src/proxy_app/settings_tool.py
CHANGED
|
@@ -17,37 +17,38 @@ console = Console()
|
|
| 17 |
|
| 18 |
def clear_screen():
|
| 19 |
"""
|
| 20 |
-
Cross-platform terminal clear that works robustly on both
|
| 21 |
classic Windows conhost and modern terminals (Windows Terminal, Linux, Mac).
|
| 22 |
-
|
| 23 |
Uses native OS commands instead of ANSI escape sequences:
|
| 24 |
- Windows (conhost & Windows Terminal): cls
|
| 25 |
- Unix-like systems (Linux, Mac): clear
|
| 26 |
"""
|
| 27 |
-
os.system(
|
| 28 |
|
| 29 |
|
| 30 |
class AdvancedSettings:
|
| 31 |
"""Manages pending changes to .env"""
|
| 32 |
-
|
| 33 |
def __init__(self):
|
| 34 |
self.env_file = Path.cwd() / ".env"
|
| 35 |
self.pending_changes = {} # key -> value (None means delete)
|
| 36 |
self.load_current_settings()
|
| 37 |
-
|
| 38 |
def load_current_settings(self):
|
| 39 |
"""Load current .env values into env vars"""
|
| 40 |
from dotenv import load_dotenv
|
|
|
|
| 41 |
load_dotenv(override=True)
|
| 42 |
-
|
| 43 |
def set(self, key: str, value: str):
|
| 44 |
"""Stage a change"""
|
| 45 |
self.pending_changes[key] = value
|
| 46 |
-
|
| 47 |
def remove(self, key: str):
|
| 48 |
"""Stage a removal"""
|
| 49 |
self.pending_changes[key] = None
|
| 50 |
-
|
| 51 |
def save(self):
|
| 52 |
"""Write pending changes to .env"""
|
| 53 |
for key, value in self.pending_changes.items():
|
|
@@ -57,14 +58,14 @@ class AdvancedSettings:
|
|
| 57 |
else:
|
| 58 |
# Set key
|
| 59 |
set_key(str(self.env_file), key, value)
|
| 60 |
-
|
| 61 |
self.pending_changes.clear()
|
| 62 |
self.load_current_settings()
|
| 63 |
-
|
| 64 |
def discard(self):
|
| 65 |
"""Discard pending changes"""
|
| 66 |
self.pending_changes.clear()
|
| 67 |
-
|
| 68 |
def has_pending(self) -> bool:
|
| 69 |
"""Check if there are pending changes"""
|
| 70 |
return bool(self.pending_changes)
|
|
@@ -72,14 +73,14 @@ class AdvancedSettings:
|
|
| 72 |
|
| 73 |
class CustomProviderManager:
|
| 74 |
"""Manages custom provider API bases"""
|
| 75 |
-
|
| 76 |
def __init__(self, settings: AdvancedSettings):
|
| 77 |
self.settings = settings
|
| 78 |
-
|
| 79 |
def get_current_providers(self) -> Dict[str, str]:
|
| 80 |
"""Get currently configured custom providers"""
|
| 81 |
from proxy_app.provider_urls import PROVIDER_URL_MAP
|
| 82 |
-
|
| 83 |
providers = {}
|
| 84 |
for key, value in os.environ.items():
|
| 85 |
if key.endswith("_API_BASE"):
|
|
@@ -88,16 +89,16 @@ class CustomProviderManager:
|
|
| 88 |
if provider not in PROVIDER_URL_MAP:
|
| 89 |
providers[provider] = value
|
| 90 |
return providers
|
| 91 |
-
|
| 92 |
def add_provider(self, name: str, api_base: str):
|
| 93 |
"""Add PROVIDER_API_BASE"""
|
| 94 |
key = f"{name.upper()}_API_BASE"
|
| 95 |
self.settings.set(key, api_base)
|
| 96 |
-
|
| 97 |
def edit_provider(self, name: str, api_base: str):
|
| 98 |
"""Edit PROVIDER_API_BASE"""
|
| 99 |
self.add_provider(name, api_base)
|
| 100 |
-
|
| 101 |
def remove_provider(self, name: str):
|
| 102 |
"""Remove PROVIDER_API_BASE"""
|
| 103 |
key = f"{name.upper()}_API_BASE"
|
|
@@ -106,10 +107,10 @@ class CustomProviderManager:
|
|
| 106 |
|
| 107 |
class ModelDefinitionManager:
|
| 108 |
"""Manages PROVIDER_MODELS"""
|
| 109 |
-
|
| 110 |
def __init__(self, settings: AdvancedSettings):
|
| 111 |
self.settings = settings
|
| 112 |
-
|
| 113 |
def get_current_provider_models(self, provider: str) -> Optional[Dict]:
|
| 114 |
"""Get currently configured models for a provider"""
|
| 115 |
key = f"{provider.upper()}_MODELS"
|
|
@@ -120,7 +121,7 @@ class ModelDefinitionManager:
|
|
| 120 |
except (json.JSONDecodeError, ValueError):
|
| 121 |
return None
|
| 122 |
return None
|
| 123 |
-
|
| 124 |
def get_all_providers_with_models(self) -> Dict[str, int]:
|
| 125 |
"""Get all providers with model definitions"""
|
| 126 |
providers = {}
|
|
@@ -136,13 +137,13 @@ class ModelDefinitionManager:
|
|
| 136 |
except (json.JSONDecodeError, ValueError):
|
| 137 |
pass
|
| 138 |
return providers
|
| 139 |
-
|
| 140 |
def set_models(self, provider: str, models: Dict[str, Dict[str, Any]]):
|
| 141 |
"""Set PROVIDER_MODELS"""
|
| 142 |
key = f"{provider.upper()}_MODELS"
|
| 143 |
value = json.dumps(models)
|
| 144 |
self.settings.set(key, value)
|
| 145 |
-
|
| 146 |
def remove_models(self, provider: str):
|
| 147 |
"""Remove PROVIDER_MODELS"""
|
| 148 |
key = f"{provider.upper()}_MODELS"
|
|
@@ -151,10 +152,10 @@ class ModelDefinitionManager:
|
|
| 151 |
|
| 152 |
class ConcurrencyManager:
|
| 153 |
"""Manages MAX_CONCURRENT_REQUESTS_PER_KEY_PROVIDER"""
|
| 154 |
-
|
| 155 |
def __init__(self, settings: AdvancedSettings):
|
| 156 |
self.settings = settings
|
| 157 |
-
|
| 158 |
def get_current_limits(self) -> Dict[str, int]:
|
| 159 |
"""Get currently configured concurrency limits"""
|
| 160 |
limits = {}
|
|
@@ -166,18 +167,161 @@ class ConcurrencyManager:
|
|
| 166 |
except (json.JSONDecodeError, ValueError):
|
| 167 |
pass
|
| 168 |
return limits
|
| 169 |
-
|
| 170 |
def set_limit(self, provider: str, limit: int):
|
| 171 |
"""Set concurrency limit"""
|
| 172 |
key = f"MAX_CONCURRENT_REQUESTS_PER_KEY_{provider.upper()}"
|
| 173 |
self.settings.set(key, str(limit))
|
| 174 |
-
|
| 175 |
def remove_limit(self, provider: str):
|
| 176 |
"""Remove concurrency limit (reset to default)"""
|
| 177 |
key = f"MAX_CONCURRENT_REQUESTS_PER_KEY_{provider.upper()}"
|
| 178 |
self.settings.remove(key)
|
| 179 |
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
# =============================================================================
|
| 182 |
# PROVIDER-SPECIFIC SETTINGS DEFINITIONS
|
| 183 |
# =============================================================================
|
|
@@ -294,24 +438,26 @@ PROVIDER_SETTINGS_MAP = {
|
|
| 294 |
|
| 295 |
class ProviderSettingsManager:
|
| 296 |
"""Manages provider-specific configuration settings"""
|
| 297 |
-
|
| 298 |
def __init__(self, settings: AdvancedSettings):
|
| 299 |
self.settings = settings
|
| 300 |
-
|
| 301 |
def get_available_providers(self) -> List[str]:
|
| 302 |
"""Get list of providers with specific settings available"""
|
| 303 |
return list(PROVIDER_SETTINGS_MAP.keys())
|
| 304 |
-
|
| 305 |
-
def get_provider_settings_definitions(
|
|
|
|
|
|
|
| 306 |
"""Get settings definitions for a provider"""
|
| 307 |
return PROVIDER_SETTINGS_MAP.get(provider, {})
|
| 308 |
-
|
| 309 |
def get_current_value(self, key: str, definition: Dict[str, Any]) -> Any:
|
| 310 |
"""Get current value of a setting from environment"""
|
| 311 |
env_value = os.getenv(key)
|
| 312 |
if env_value is None:
|
| 313 |
return definition.get("default")
|
| 314 |
-
|
| 315 |
setting_type = definition.get("type", "str")
|
| 316 |
try:
|
| 317 |
if setting_type == "bool":
|
|
@@ -322,7 +468,7 @@ class ProviderSettingsManager:
|
|
| 322 |
return env_value
|
| 323 |
except (ValueError, AttributeError):
|
| 324 |
return definition.get("default")
|
| 325 |
-
|
| 326 |
def get_all_current_values(self, provider: str) -> Dict[str, Any]:
|
| 327 |
"""Get all current values for a provider"""
|
| 328 |
definitions = self.get_provider_settings_definitions(provider)
|
|
@@ -330,7 +476,7 @@ class ProviderSettingsManager:
|
|
| 330 |
for key, definition in definitions.items():
|
| 331 |
values[key] = self.get_current_value(key, definition)
|
| 332 |
return values
|
| 333 |
-
|
| 334 |
def set_value(self, key: str, value: Any, definition: Dict[str, Any]):
|
| 335 |
"""Set a setting value, converting to string for .env storage"""
|
| 336 |
setting_type = definition.get("type", "str")
|
|
@@ -339,11 +485,11 @@ class ProviderSettingsManager:
|
|
| 339 |
else:
|
| 340 |
str_value = str(value)
|
| 341 |
self.settings.set(key, str_value)
|
| 342 |
-
|
| 343 |
def reset_to_default(self, key: str):
|
| 344 |
"""Remove a setting to reset it to default"""
|
| 345 |
self.settings.remove(key)
|
| 346 |
-
|
| 347 |
def get_modified_settings(self, provider: str) -> Dict[str, Any]:
|
| 348 |
"""Get settings that differ from defaults"""
|
| 349 |
definitions = self.get_provider_settings_definitions(provider)
|
|
@@ -358,80 +504,100 @@ class ProviderSettingsManager:
|
|
| 358 |
|
| 359 |
class SettingsTool:
|
| 360 |
"""Main settings tool TUI"""
|
| 361 |
-
|
| 362 |
def __init__(self):
|
| 363 |
self.console = Console()
|
| 364 |
self.settings = AdvancedSettings()
|
| 365 |
self.provider_mgr = CustomProviderManager(self.settings)
|
| 366 |
self.model_mgr = ModelDefinitionManager(self.settings)
|
| 367 |
self.concurrency_mgr = ConcurrencyManager(self.settings)
|
|
|
|
|
|
|
| 368 |
self.provider_settings_mgr = ProviderSettingsManager(self.settings)
|
| 369 |
self.running = True
|
| 370 |
-
|
| 371 |
def get_available_providers(self) -> List[str]:
|
| 372 |
"""Get list of providers that have credentials configured"""
|
| 373 |
env_file = Path.cwd() / ".env"
|
| 374 |
providers = set()
|
| 375 |
-
|
| 376 |
# Scan for providers with API keys from local .env
|
| 377 |
if env_file.exists():
|
| 378 |
try:
|
| 379 |
-
with open(env_file,
|
| 380 |
for line in f:
|
| 381 |
line = line.strip()
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
provider = line.split("_API_KEY")[0].strip().lower()
|
| 384 |
providers.add(provider)
|
| 385 |
except (IOError, OSError):
|
| 386 |
pass
|
| 387 |
-
|
| 388 |
# Also check for OAuth providers from files
|
| 389 |
-
oauth_dir = Path("
|
| 390 |
if oauth_dir.exists():
|
| 391 |
for file in oauth_dir.glob("*_oauth_*.json"):
|
| 392 |
provider = file.name.split("_oauth_")[0]
|
| 393 |
providers.add(provider)
|
| 394 |
-
|
| 395 |
return sorted(list(providers))
|
| 396 |
|
| 397 |
def run(self):
|
| 398 |
"""Main loop"""
|
| 399 |
while self.running:
|
| 400 |
self.show_main_menu()
|
| 401 |
-
|
| 402 |
def show_main_menu(self):
|
| 403 |
"""Display settings categories"""
|
| 404 |
clear_screen()
|
| 405 |
-
|
| 406 |
-
self.console.print(
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
|
|
|
|
|
|
| 411 |
self.console.print()
|
| 412 |
self.console.print("[bold]⚙️ Configuration Categories[/bold]")
|
| 413 |
self.console.print()
|
| 414 |
self.console.print(" 1. 🌐 Custom Provider API Bases")
|
| 415 |
self.console.print(" 2. 📦 Provider Model Definitions")
|
| 416 |
self.console.print(" 3. ⚡ Concurrency Limits")
|
| 417 |
-
self.console.print(" 4.
|
| 418 |
-
self.console.print(" 5.
|
| 419 |
-
self.console.print(" 6.
|
| 420 |
-
|
|
|
|
| 421 |
self.console.print()
|
| 422 |
self.console.print("━" * 70)
|
| 423 |
-
|
| 424 |
if self.settings.has_pending():
|
| 425 |
-
self.console.print(
|
|
|
|
|
|
|
| 426 |
else:
|
| 427 |
self.console.print("[dim]ℹ️ No pending changes[/dim]")
|
| 428 |
-
|
| 429 |
self.console.print()
|
| 430 |
-
self.console.print(
|
|
|
|
|
|
|
| 431 |
self.console.print()
|
| 432 |
-
|
| 433 |
-
choice = Prompt.ask(
|
| 434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
if choice == "1":
|
| 436 |
self.manage_custom_providers()
|
| 437 |
elif choice == "2":
|
|
@@ -439,34 +605,38 @@ class SettingsTool:
|
|
| 439 |
elif choice == "3":
|
| 440 |
self.manage_concurrency_limits()
|
| 441 |
elif choice == "4":
|
| 442 |
-
self.
|
| 443 |
elif choice == "5":
|
| 444 |
-
self.
|
| 445 |
elif choice == "6":
|
|
|
|
|
|
|
| 446 |
self.exit_without_saving()
|
| 447 |
-
|
| 448 |
def manage_custom_providers(self):
|
| 449 |
"""Manage custom provider API bases"""
|
| 450 |
while True:
|
| 451 |
clear_screen()
|
| 452 |
-
|
| 453 |
providers = self.provider_mgr.get_current_providers()
|
| 454 |
-
|
| 455 |
-
self.console.print(
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
|
|
|
|
|
|
| 460 |
self.console.print()
|
| 461 |
self.console.print("[bold]📋 Configured Custom Providers[/bold]")
|
| 462 |
self.console.print("━" * 70)
|
| 463 |
-
|
| 464 |
if providers:
|
| 465 |
for name, base in providers.items():
|
| 466 |
self.console.print(f" • {name:15} {base}")
|
| 467 |
else:
|
| 468 |
self.console.print(" [dim]No custom providers configured[/dim]")
|
| 469 |
-
|
| 470 |
self.console.print()
|
| 471 |
self.console.print("━" * 70)
|
| 472 |
self.console.print()
|
|
@@ -476,94 +646,116 @@ class SettingsTool:
|
|
| 476 |
self.console.print(" 2. ✏️ Edit Existing Provider")
|
| 477 |
self.console.print(" 3. 🗑️ Remove Provider")
|
| 478 |
self.console.print(" 4. ↩️ Back to Settings Menu")
|
| 479 |
-
|
| 480 |
self.console.print()
|
| 481 |
self.console.print("━" * 70)
|
| 482 |
self.console.print()
|
| 483 |
-
|
| 484 |
-
choice = Prompt.ask(
|
| 485 |
-
|
|
|
|
|
|
|
| 486 |
if choice == "1":
|
| 487 |
name = Prompt.ask("Provider name (e.g., 'opencode')").strip().lower()
|
| 488 |
if name:
|
| 489 |
api_base = Prompt.ask("API Base URL").strip()
|
| 490 |
if api_base:
|
| 491 |
self.provider_mgr.add_provider(name, api_base)
|
| 492 |
-
self.console.print(
|
| 493 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
input("\nPress Enter to continue...")
|
| 495 |
-
|
| 496 |
elif choice == "2":
|
| 497 |
if not providers:
|
| 498 |
self.console.print("\n[yellow]No providers to edit[/yellow]")
|
| 499 |
input("\nPress Enter to continue...")
|
| 500 |
continue
|
| 501 |
-
|
| 502 |
# Show numbered list
|
| 503 |
self.console.print("\n[bold]Select provider to edit:[/bold]")
|
| 504 |
providers_list = list(providers.keys())
|
| 505 |
for idx, prov in enumerate(providers_list, 1):
|
| 506 |
self.console.print(f" {idx}. {prov}")
|
| 507 |
-
|
| 508 |
-
choice_idx = IntPrompt.ask(
|
|
|
|
|
|
|
|
|
|
| 509 |
name = providers_list[choice_idx - 1]
|
| 510 |
current_base = providers.get(name, "")
|
| 511 |
-
|
| 512 |
self.console.print(f"\nCurrent API Base: {current_base}")
|
| 513 |
-
new_base = Prompt.ask(
|
| 514 |
-
|
|
|
|
|
|
|
| 515 |
if new_base and new_base != current_base:
|
| 516 |
self.provider_mgr.edit_provider(name, new_base)
|
| 517 |
-
self.console.print(
|
|
|
|
|
|
|
| 518 |
else:
|
| 519 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 520 |
input("\nPress Enter to continue...")
|
| 521 |
-
|
| 522 |
elif choice == "3":
|
| 523 |
if not providers:
|
| 524 |
self.console.print("\n[yellow]No providers to remove[/yellow]")
|
| 525 |
input("\nPress Enter to continue...")
|
| 526 |
continue
|
| 527 |
-
|
| 528 |
# Show numbered list
|
| 529 |
self.console.print("\n[bold]Select provider to remove:[/bold]")
|
| 530 |
providers_list = list(providers.keys())
|
| 531 |
for idx, prov in enumerate(providers_list, 1):
|
| 532 |
self.console.print(f" {idx}. {prov}")
|
| 533 |
-
|
| 534 |
-
choice_idx = IntPrompt.ask(
|
|
|
|
|
|
|
|
|
|
| 535 |
name = providers_list[choice_idx - 1]
|
| 536 |
-
|
| 537 |
if Confirm.ask(f"Remove '{name}'?"):
|
| 538 |
self.provider_mgr.remove_provider(name)
|
| 539 |
-
self.console.print(
|
|
|
|
|
|
|
| 540 |
input("\nPress Enter to continue...")
|
| 541 |
-
|
| 542 |
elif choice == "4":
|
| 543 |
break
|
| 544 |
-
|
| 545 |
def manage_model_definitions(self):
|
| 546 |
"""Manage provider model definitions"""
|
| 547 |
while True:
|
| 548 |
clear_screen()
|
| 549 |
-
|
| 550 |
all_providers = self.model_mgr.get_all_providers_with_models()
|
| 551 |
-
|
| 552 |
-
self.console.print(
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
|
|
|
|
|
|
| 557 |
self.console.print()
|
| 558 |
self.console.print("[bold]📋 Configured Provider Models[/bold]")
|
| 559 |
self.console.print("━" * 70)
|
| 560 |
-
|
| 561 |
if all_providers:
|
| 562 |
for provider, count in all_providers.items():
|
| 563 |
-
self.console.print(
|
|
|
|
|
|
|
| 564 |
else:
|
| 565 |
self.console.print(" [dim]No model definitions configured[/dim]")
|
| 566 |
-
|
| 567 |
self.console.print()
|
| 568 |
self.console.print("━" * 70)
|
| 569 |
self.console.print()
|
|
@@ -574,13 +766,15 @@ class SettingsTool:
|
|
| 574 |
self.console.print(" 3. 👁️ View Provider Models")
|
| 575 |
self.console.print(" 4. 🗑️ Remove Provider Models")
|
| 576 |
self.console.print(" 5. ↩️ Back to Settings Menu")
|
| 577 |
-
|
| 578 |
self.console.print()
|
| 579 |
self.console.print("━" * 70)
|
| 580 |
self.console.print()
|
| 581 |
-
|
| 582 |
-
choice = Prompt.ask(
|
| 583 |
-
|
|
|
|
|
|
|
| 584 |
if choice == "1":
|
| 585 |
self.add_model_definitions()
|
| 586 |
elif choice == "2":
|
|
@@ -600,57 +794,71 @@ class SettingsTool:
|
|
| 600 |
self.console.print("\n[yellow]No providers to remove[/yellow]")
|
| 601 |
input("\nPress Enter to continue...")
|
| 602 |
continue
|
| 603 |
-
|
| 604 |
# Show numbered list
|
| 605 |
-
self.console.print(
|
|
|
|
|
|
|
| 606 |
providers_list = list(all_providers.keys())
|
| 607 |
for idx, prov in enumerate(providers_list, 1):
|
| 608 |
self.console.print(f" {idx}. {prov}")
|
| 609 |
-
|
| 610 |
-
choice_idx = IntPrompt.ask(
|
|
|
|
|
|
|
|
|
|
| 611 |
provider = providers_list[choice_idx - 1]
|
| 612 |
-
|
| 613 |
if Confirm.ask(f"Remove all model definitions for '{provider}'?"):
|
| 614 |
self.model_mgr.remove_models(provider)
|
| 615 |
-
self.console.print(
|
|
|
|
|
|
|
| 616 |
input("\nPress Enter to continue...")
|
| 617 |
elif choice == "5":
|
| 618 |
break
|
| 619 |
-
|
| 620 |
def add_model_definitions(self):
|
| 621 |
"""Add model definitions for a provider"""
|
| 622 |
# Get available providers from credentials
|
| 623 |
available_providers = self.get_available_providers()
|
| 624 |
-
|
| 625 |
if not available_providers:
|
| 626 |
-
self.console.print(
|
|
|
|
|
|
|
| 627 |
input("\nPress Enter to continue...")
|
| 628 |
return
|
| 629 |
-
|
| 630 |
# Show provider selection menu
|
| 631 |
self.console.print("\n[bold]Select provider:[/bold]")
|
| 632 |
for idx, prov in enumerate(available_providers, 1):
|
| 633 |
self.console.print(f" {idx}. {prov}")
|
| 634 |
-
self.console.print(
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
if choice == len(available_providers) + 1:
|
| 639 |
provider = Prompt.ask("Provider name").strip().lower()
|
| 640 |
else:
|
| 641 |
provider = available_providers[choice - 1]
|
| 642 |
-
|
| 643 |
if not provider:
|
| 644 |
return
|
| 645 |
-
|
| 646 |
self.console.print("\nHow would you like to define models?")
|
| 647 |
self.console.print(" 1. Simple list (names only)")
|
| 648 |
self.console.print(" 2. Advanced (names with IDs and options)")
|
| 649 |
-
|
| 650 |
mode = Prompt.ask("Select mode", choices=["1", "2"], show_choices=False)
|
| 651 |
-
|
| 652 |
models = {}
|
| 653 |
-
|
| 654 |
if mode == "1":
|
| 655 |
# Simple mode
|
| 656 |
while True:
|
|
@@ -667,13 +875,19 @@ class SettingsTool:
|
|
| 667 |
break
|
| 668 |
if name:
|
| 669 |
model_def = {}
|
| 670 |
-
model_id = Prompt.ask(
|
|
|
|
|
|
|
| 671 |
if model_id and model_id != name:
|
| 672 |
model_def["id"] = model_id
|
| 673 |
-
|
| 674 |
# Optional: model options
|
| 675 |
-
if Confirm.ask(
|
| 676 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
options = {}
|
| 678 |
while True:
|
| 679 |
opt = Prompt.ask("Option").strip()
|
|
@@ -690,121 +904,143 @@ class SettingsTool:
|
|
| 690 |
options[key.strip()] = value
|
| 691 |
if options:
|
| 692 |
model_def["options"] = options
|
| 693 |
-
|
| 694 |
models[name] = model_def
|
| 695 |
-
|
| 696 |
if models:
|
| 697 |
self.model_mgr.set_models(provider, models)
|
| 698 |
-
self.console.print(
|
|
|
|
|
|
|
| 699 |
else:
|
| 700 |
self.console.print("\n[yellow]No models added[/yellow]")
|
| 701 |
-
|
| 702 |
input("\nPress Enter to continue...")
|
| 703 |
-
|
| 704 |
def edit_model_definitions(self, providers: List[str]):
|
| 705 |
"""Edit existing model definitions"""
|
| 706 |
# Show numbered list
|
| 707 |
self.console.print("\n[bold]Select provider to edit:[/bold]")
|
| 708 |
for idx, prov in enumerate(providers, 1):
|
| 709 |
self.console.print(f" {idx}. {prov}")
|
| 710 |
-
|
| 711 |
-
choice_idx = IntPrompt.ask(
|
|
|
|
|
|
|
| 712 |
provider = providers[choice_idx - 1]
|
| 713 |
-
|
| 714 |
current_models = self.model_mgr.get_current_provider_models(provider)
|
| 715 |
if not current_models:
|
| 716 |
self.console.print(f"\n[yellow]No models found for '{provider}'[/yellow]")
|
| 717 |
input("\nPress Enter to continue...")
|
| 718 |
return
|
| 719 |
-
|
| 720 |
# Convert to dict if list
|
| 721 |
if isinstance(current_models, list):
|
| 722 |
current_models = {m: {} for m in current_models}
|
| 723 |
-
|
| 724 |
while True:
|
| 725 |
clear_screen()
|
| 726 |
self.console.print(f"[bold]Editing models for: {provider}[/bold]\n")
|
| 727 |
self.console.print("Current models:")
|
| 728 |
for i, (name, definition) in enumerate(current_models.items(), 1):
|
| 729 |
-
model_id =
|
|
|
|
|
|
|
| 730 |
self.console.print(f" {i}. {name} (ID: {model_id})")
|
| 731 |
-
|
| 732 |
self.console.print("\nOptions:")
|
| 733 |
self.console.print(" 1. Add new model")
|
| 734 |
self.console.print(" 2. Edit existing model")
|
| 735 |
self.console.print(" 3. Remove model")
|
| 736 |
self.console.print(" 4. Done")
|
| 737 |
-
|
| 738 |
-
choice = Prompt.ask(
|
| 739 |
-
|
|
|
|
|
|
|
| 740 |
if choice == "1":
|
| 741 |
name = Prompt.ask("New model name").strip()
|
| 742 |
if name and name not in current_models:
|
| 743 |
model_id = Prompt.ask("Model ID", default=name).strip()
|
| 744 |
current_models[name] = {"id": model_id} if model_id != name else {}
|
| 745 |
-
|
| 746 |
elif choice == "2":
|
| 747 |
# Show numbered list
|
| 748 |
models_list = list(current_models.keys())
|
| 749 |
self.console.print("\n[bold]Select model to edit:[/bold]")
|
| 750 |
for idx, model_name in enumerate(models_list, 1):
|
| 751 |
self.console.print(f" {idx}. {model_name}")
|
| 752 |
-
|
| 753 |
-
model_idx = IntPrompt.ask(
|
|
|
|
|
|
|
|
|
|
| 754 |
name = models_list[model_idx - 1]
|
| 755 |
-
|
| 756 |
current_def = current_models[name]
|
| 757 |
-
current_id =
|
| 758 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 759 |
new_id = Prompt.ask("Model ID", default=current_id).strip()
|
| 760 |
current_models[name] = {"id": new_id} if new_id != name else {}
|
| 761 |
-
|
| 762 |
elif choice == "3":
|
| 763 |
# Show numbered list
|
| 764 |
models_list = list(current_models.keys())
|
| 765 |
self.console.print("\n[bold]Select model to remove:[/bold]")
|
| 766 |
for idx, model_name in enumerate(models_list, 1):
|
| 767 |
self.console.print(f" {idx}. {model_name}")
|
| 768 |
-
|
| 769 |
-
model_idx = IntPrompt.ask(
|
|
|
|
|
|
|
|
|
|
| 770 |
name = models_list[model_idx - 1]
|
| 771 |
-
|
| 772 |
if Confirm.ask(f"Remove '{name}'?"):
|
| 773 |
del current_models[name]
|
| 774 |
-
|
| 775 |
elif choice == "4":
|
| 776 |
break
|
| 777 |
-
|
| 778 |
if current_models:
|
| 779 |
self.model_mgr.set_models(provider, current_models)
|
| 780 |
self.console.print(f"\n[green]✅ Models updated for '{provider}'![/green]")
|
| 781 |
else:
|
| 782 |
-
self.console.print(
|
|
|
|
|
|
|
| 783 |
self.model_mgr.remove_models(provider)
|
| 784 |
-
|
| 785 |
input("\nPress Enter to continue...")
|
| 786 |
-
|
| 787 |
def view_model_definitions(self, providers: List[str]):
|
| 788 |
"""View model definitions for a provider"""
|
| 789 |
# Show numbered list
|
| 790 |
self.console.print("\n[bold]Select provider to view:[/bold]")
|
| 791 |
for idx, prov in enumerate(providers, 1):
|
| 792 |
self.console.print(f" {idx}. {prov}")
|
| 793 |
-
|
| 794 |
-
choice_idx = IntPrompt.ask(
|
|
|
|
|
|
|
| 795 |
provider = providers[choice_idx - 1]
|
| 796 |
-
|
| 797 |
models = self.model_mgr.get_current_provider_models(provider)
|
| 798 |
if not models:
|
| 799 |
self.console.print(f"\n[yellow]No models found for '{provider}'[/yellow]")
|
| 800 |
input("\nPress Enter to continue...")
|
| 801 |
return
|
| 802 |
-
|
| 803 |
clear_screen()
|
| 804 |
self.console.print(f"[bold]Provider: {provider}[/bold]\n")
|
| 805 |
self.console.print("[bold]📦 Configured Models:[/bold]")
|
| 806 |
self.console.print("━" * 50)
|
| 807 |
-
|
| 808 |
# Handle both dict and list formats
|
| 809 |
if isinstance(models, dict):
|
| 810 |
for name, definition in models.items():
|
|
@@ -822,74 +1058,88 @@ class SettingsTool:
|
|
| 822 |
for name in models:
|
| 823 |
self.console.print(f" Name: {name}")
|
| 824 |
self.console.print()
|
| 825 |
-
|
| 826 |
input("Press Enter to return...")
|
| 827 |
-
|
| 828 |
def manage_provider_settings(self):
|
| 829 |
"""Manage provider-specific settings (Antigravity, Gemini CLI)"""
|
| 830 |
while True:
|
| 831 |
clear_screen()
|
| 832 |
-
|
| 833 |
available_providers = self.provider_settings_mgr.get_available_providers()
|
| 834 |
-
|
| 835 |
-
self.console.print(
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
|
|
|
|
|
|
| 840 |
self.console.print()
|
| 841 |
-
self.console.print(
|
|
|
|
|
|
|
| 842 |
self.console.print("━" * 70)
|
| 843 |
-
|
| 844 |
for provider in available_providers:
|
| 845 |
modified = self.provider_settings_mgr.get_modified_settings(provider)
|
| 846 |
-
status =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 847 |
display_name = provider.replace("_", " ").title()
|
| 848 |
self.console.print(f" • {display_name:20} {status}")
|
| 849 |
-
|
| 850 |
self.console.print()
|
| 851 |
self.console.print("━" * 70)
|
| 852 |
self.console.print()
|
| 853 |
self.console.print("[bold]⚙️ Select Provider to Configure[/bold]")
|
| 854 |
self.console.print()
|
| 855 |
-
|
| 856 |
for idx, provider in enumerate(available_providers, 1):
|
| 857 |
display_name = provider.replace("_", " ").title()
|
| 858 |
self.console.print(f" {idx}. {display_name}")
|
| 859 |
-
self.console.print(
|
| 860 |
-
|
|
|
|
|
|
|
| 861 |
self.console.print()
|
| 862 |
self.console.print("━" * 70)
|
| 863 |
self.console.print()
|
| 864 |
-
|
| 865 |
choices = [str(i) for i in range(1, len(available_providers) + 2)]
|
| 866 |
choice = Prompt.ask("Select option", choices=choices, show_choices=False)
|
| 867 |
choice_idx = int(choice)
|
| 868 |
-
|
| 869 |
if choice_idx == len(available_providers) + 1:
|
| 870 |
break
|
| 871 |
-
|
| 872 |
provider = available_providers[choice_idx - 1]
|
| 873 |
self._manage_single_provider_settings(provider)
|
| 874 |
-
|
| 875 |
def _manage_single_provider_settings(self, provider: str):
|
| 876 |
"""Manage settings for a single provider"""
|
| 877 |
while True:
|
| 878 |
clear_screen()
|
| 879 |
-
|
| 880 |
display_name = provider.replace("_", " ").title()
|
| 881 |
-
definitions = self.provider_settings_mgr.get_provider_settings_definitions(
|
|
|
|
|
|
|
| 882 |
current_values = self.provider_settings_mgr.get_all_current_values(provider)
|
| 883 |
-
|
| 884 |
-
self.console.print(
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
|
|
|
|
|
|
| 889 |
self.console.print()
|
| 890 |
self.console.print("[bold]📋 Current Settings[/bold]")
|
| 891 |
self.console.print("━" * 70)
|
| 892 |
-
|
| 893 |
# Display all settings with current values
|
| 894 |
settings_list = list(definitions.keys())
|
| 895 |
for idx, key in enumerate(settings_list, 1):
|
|
@@ -898,25 +1148,35 @@ class SettingsTool:
|
|
| 898 |
default = definition.get("default")
|
| 899 |
setting_type = definition.get("type", "str")
|
| 900 |
description = definition.get("description", "")
|
| 901 |
-
|
| 902 |
# Format value display
|
| 903 |
if setting_type == "bool":
|
| 904 |
-
value_display =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 905 |
elif setting_type == "int":
|
| 906 |
value_display = f"[cyan]{current}[/cyan]"
|
| 907 |
else:
|
| 908 |
-
value_display =
|
| 909 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 910 |
# Check if modified from default
|
| 911 |
modified = current != default
|
| 912 |
mod_marker = "[yellow]*[/yellow]" if modified else " "
|
| 913 |
-
|
| 914 |
# Short key name for display (strip provider prefix)
|
| 915 |
short_key = key.replace(f"{provider.upper()}_", "")
|
| 916 |
-
|
| 917 |
-
self.console.print(
|
|
|
|
|
|
|
| 918 |
self.console.print(f" [dim]{description}[/dim]")
|
| 919 |
-
|
| 920 |
self.console.print()
|
| 921 |
self.console.print("━" * 70)
|
| 922 |
self.console.print("[dim]* = modified from default[/dim]")
|
|
@@ -927,13 +1187,17 @@ class SettingsTool:
|
|
| 927 |
self.console.print(" R. 🔄 Reset Setting to Default")
|
| 928 |
self.console.print(" A. 🔄 Reset All to Defaults")
|
| 929 |
self.console.print(" B. ↩️ Back to Provider Selection")
|
| 930 |
-
|
| 931 |
self.console.print()
|
| 932 |
self.console.print("━" * 70)
|
| 933 |
self.console.print()
|
| 934 |
-
|
| 935 |
-
choice = Prompt.ask(
|
| 936 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 937 |
if choice == "b":
|
| 938 |
break
|
| 939 |
elif choice == "e":
|
|
@@ -942,26 +1206,31 @@ class SettingsTool:
|
|
| 942 |
self._reset_provider_setting(provider, settings_list, definitions)
|
| 943 |
elif choice == "a":
|
| 944 |
self._reset_all_provider_settings(provider, settings_list)
|
| 945 |
-
|
| 946 |
-
def _edit_provider_setting(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 947 |
"""Edit a single provider setting"""
|
| 948 |
self.console.print("\n[bold]Select setting number to edit:[/bold]")
|
| 949 |
-
|
| 950 |
choices = [str(i) for i in range(1, len(settings_list) + 1)]
|
| 951 |
choice = IntPrompt.ask("Setting number", choices=choices)
|
| 952 |
key = settings_list[choice - 1]
|
| 953 |
definition = definitions[key]
|
| 954 |
-
|
| 955 |
current = self.provider_settings_mgr.get_current_value(key, definition)
|
| 956 |
default = definition.get("default")
|
| 957 |
setting_type = definition.get("type", "str")
|
| 958 |
short_key = key.replace(f"{provider.upper()}_", "")
|
| 959 |
-
|
| 960 |
self.console.print(f"\n[bold]Editing: {short_key}[/bold]")
|
| 961 |
self.console.print(f"Current value: [cyan]{current}[/cyan]")
|
| 962 |
self.console.print(f"Default value: [dim]{default}[/dim]")
|
| 963 |
self.console.print(f"Type: {setting_type}")
|
| 964 |
-
|
| 965 |
if setting_type == "bool":
|
| 966 |
new_value = Confirm.ask("\nEnable this setting?", default=current)
|
| 967 |
self.provider_settings_mgr.set_value(key, new_value, definition)
|
|
@@ -972,71 +1241,415 @@ class SettingsTool:
|
|
| 972 |
self.provider_settings_mgr.set_value(key, new_value, definition)
|
| 973 |
self.console.print(f"\n[green]✅ {short_key} set to {new_value}![/green]")
|
| 974 |
else:
|
| 975 |
-
new_value = Prompt.ask(
|
|
|
|
|
|
|
| 976 |
if new_value:
|
| 977 |
self.provider_settings_mgr.set_value(key, new_value, definition)
|
| 978 |
self.console.print(f"\n[green]✅ {short_key} updated![/green]")
|
| 979 |
else:
|
| 980 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 981 |
-
|
| 982 |
input("\nPress Enter to continue...")
|
| 983 |
-
|
| 984 |
-
def _reset_provider_setting(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
"""Reset a single provider setting to default"""
|
| 986 |
self.console.print("\n[bold]Select setting number to reset:[/bold]")
|
| 987 |
-
|
| 988 |
choices = [str(i) for i in range(1, len(settings_list) + 1)]
|
| 989 |
choice = IntPrompt.ask("Setting number", choices=choices)
|
| 990 |
key = settings_list[choice - 1]
|
| 991 |
definition = definitions[key]
|
| 992 |
-
|
| 993 |
default = definition.get("default")
|
| 994 |
short_key = key.replace(f"{provider.upper()}_", "")
|
| 995 |
-
|
| 996 |
if Confirm.ask(f"\nReset {short_key} to default ({default})?"):
|
| 997 |
self.provider_settings_mgr.reset_to_default(key)
|
| 998 |
self.console.print(f"\n[green]✅ {short_key} reset to default![/green]")
|
| 999 |
else:
|
| 1000 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 1001 |
-
|
| 1002 |
input("\nPress Enter to continue...")
|
| 1003 |
-
|
| 1004 |
def _reset_all_provider_settings(self, provider: str, settings_list: List[str]):
|
| 1005 |
"""Reset all provider settings to defaults"""
|
| 1006 |
display_name = provider.replace("_", " ").title()
|
| 1007 |
-
|
| 1008 |
-
if Confirm.ask(
|
|
|
|
|
|
|
| 1009 |
for key in settings_list:
|
| 1010 |
self.provider_settings_mgr.reset_to_default(key)
|
| 1011 |
-
self.console.print(
|
|
|
|
|
|
|
| 1012 |
else:
|
| 1013 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 1014 |
-
|
| 1015 |
input("\nPress Enter to continue...")
|
| 1016 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1017 |
def manage_concurrency_limits(self):
|
| 1018 |
"""Manage concurrency limits"""
|
| 1019 |
while True:
|
| 1020 |
clear_screen()
|
| 1021 |
-
|
| 1022 |
limits = self.concurrency_mgr.get_current_limits()
|
| 1023 |
-
|
| 1024 |
-
self.console.print(
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
|
|
|
|
|
|
| 1029 |
self.console.print()
|
| 1030 |
self.console.print("[bold]📋 Current Concurrency Settings[/bold]")
|
| 1031 |
self.console.print("━" * 70)
|
| 1032 |
-
|
| 1033 |
if limits:
|
| 1034 |
for provider, limit in limits.items():
|
| 1035 |
self.console.print(f" • {provider:15} {limit} requests/key")
|
| 1036 |
self.console.print(f" • Default: 1 request/key (all others)")
|
| 1037 |
else:
|
| 1038 |
self.console.print(" • Default: 1 request/key (all providers)")
|
| 1039 |
-
|
| 1040 |
self.console.print()
|
| 1041 |
self.console.print("━" * 70)
|
| 1042 |
self.console.print()
|
|
@@ -1046,96 +1659,128 @@ class SettingsTool:
|
|
| 1046 |
self.console.print(" 2. ✏️ Edit Existing Limit")
|
| 1047 |
self.console.print(" 3. 🗑️ Remove Limit (reset to default)")
|
| 1048 |
self.console.print(" 4. ↩️ Back to Settings Menu")
|
| 1049 |
-
|
| 1050 |
self.console.print()
|
| 1051 |
self.console.print("━" * 70)
|
| 1052 |
self.console.print()
|
| 1053 |
-
|
| 1054 |
-
choice = Prompt.ask(
|
| 1055 |
-
|
|
|
|
|
|
|
| 1056 |
if choice == "1":
|
| 1057 |
# Get available providers
|
| 1058 |
available_providers = self.get_available_providers()
|
| 1059 |
-
|
| 1060 |
if not available_providers:
|
| 1061 |
-
self.console.print(
|
|
|
|
|
|
|
| 1062 |
input("\nPress Enter to continue...")
|
| 1063 |
continue
|
| 1064 |
-
|
| 1065 |
# Show provider selection menu
|
| 1066 |
self.console.print("\n[bold]Select provider:[/bold]")
|
| 1067 |
for idx, prov in enumerate(available_providers, 1):
|
| 1068 |
self.console.print(f" {idx}. {prov}")
|
| 1069 |
-
self.console.print(
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1073 |
if choice_idx == len(available_providers) + 1:
|
| 1074 |
provider = Prompt.ask("Provider name").strip().lower()
|
| 1075 |
else:
|
| 1076 |
provider = available_providers[choice_idx - 1]
|
| 1077 |
-
|
| 1078 |
if provider:
|
| 1079 |
-
limit = IntPrompt.ask(
|
|
|
|
|
|
|
| 1080 |
if 1 <= limit <= 100:
|
| 1081 |
self.concurrency_mgr.set_limit(provider, limit)
|
| 1082 |
-
self.console.print(
|
|
|
|
|
|
|
| 1083 |
else:
|
| 1084 |
-
self.console.print(
|
|
|
|
|
|
|
| 1085 |
input("\nPress Enter to continue...")
|
| 1086 |
-
|
| 1087 |
elif choice == "2":
|
| 1088 |
if not limits:
|
| 1089 |
self.console.print("\n[yellow]No limits to edit[/yellow]")
|
| 1090 |
input("\nPress Enter to continue...")
|
| 1091 |
continue
|
| 1092 |
-
|
| 1093 |
# Show numbered list
|
| 1094 |
self.console.print("\n[bold]Select provider to edit:[/bold]")
|
| 1095 |
limits_list = list(limits.keys())
|
| 1096 |
for idx, prov in enumerate(limits_list, 1):
|
| 1097 |
self.console.print(f" {idx}. {prov}")
|
| 1098 |
-
|
| 1099 |
-
choice_idx = IntPrompt.ask(
|
|
|
|
|
|
|
|
|
|
| 1100 |
provider = limits_list[choice_idx - 1]
|
| 1101 |
current_limit = limits.get(provider, 1)
|
| 1102 |
-
|
| 1103 |
self.console.print(f"\nCurrent limit: {current_limit} requests/key")
|
| 1104 |
-
new_limit = IntPrompt.ask(
|
| 1105 |
-
|
|
|
|
|
|
|
|
|
|
| 1106 |
if 1 <= new_limit <= 100:
|
| 1107 |
if new_limit != current_limit:
|
| 1108 |
self.concurrency_mgr.set_limit(provider, new_limit)
|
| 1109 |
-
self.console.print(
|
|
|
|
|
|
|
| 1110 |
else:
|
| 1111 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 1112 |
else:
|
| 1113 |
self.console.print("\n[red]Limit must be between 1-100[/red]")
|
| 1114 |
input("\nPress Enter to continue...")
|
| 1115 |
-
|
| 1116 |
elif choice == "3":
|
| 1117 |
if not limits:
|
| 1118 |
self.console.print("\n[yellow]No limits to remove[/yellow]")
|
| 1119 |
input("\nPress Enter to continue...")
|
| 1120 |
continue
|
| 1121 |
-
|
| 1122 |
# Show numbered list
|
| 1123 |
-
self.console.print(
|
|
|
|
|
|
|
| 1124 |
limits_list = list(limits.keys())
|
| 1125 |
for idx, prov in enumerate(limits_list, 1):
|
| 1126 |
self.console.print(f" {idx}. {prov}")
|
| 1127 |
-
|
| 1128 |
-
choice_idx = IntPrompt.ask(
|
|
|
|
|
|
|
|
|
|
| 1129 |
provider = limits_list[choice_idx - 1]
|
| 1130 |
-
|
| 1131 |
-
if Confirm.ask(
|
|
|
|
|
|
|
| 1132 |
self.concurrency_mgr.remove_limit(provider)
|
| 1133 |
-
self.console.print(
|
|
|
|
|
|
|
| 1134 |
input("\nPress Enter to continue...")
|
| 1135 |
-
|
| 1136 |
elif choice == "4":
|
| 1137 |
break
|
| 1138 |
-
|
| 1139 |
def save_and_exit(self):
|
| 1140 |
"""Save pending changes and exit"""
|
| 1141 |
if self.settings.has_pending():
|
|
@@ -1150,9 +1795,9 @@ class SettingsTool:
|
|
| 1150 |
else:
|
| 1151 |
self.console.print("\n[dim]No changes to save[/dim]")
|
| 1152 |
input("\nPress Enter to return to launcher...")
|
| 1153 |
-
|
| 1154 |
self.running = False
|
| 1155 |
-
|
| 1156 |
def exit_without_saving(self):
|
| 1157 |
"""Exit without saving"""
|
| 1158 |
if self.settings.has_pending():
|
|
|
|
| 17 |
|
| 18 |
def clear_screen():
|
| 19 |
"""
|
| 20 |
+
Cross-platform terminal clear that works robustly on both
|
| 21 |
classic Windows conhost and modern terminals (Windows Terminal, Linux, Mac).
|
| 22 |
+
|
| 23 |
Uses native OS commands instead of ANSI escape sequences:
|
| 24 |
- Windows (conhost & Windows Terminal): cls
|
| 25 |
- Unix-like systems (Linux, Mac): clear
|
| 26 |
"""
|
| 27 |
+
os.system("cls" if os.name == "nt" else "clear")
|
| 28 |
|
| 29 |
|
| 30 |
class AdvancedSettings:
|
| 31 |
"""Manages pending changes to .env"""
|
| 32 |
+
|
| 33 |
def __init__(self):
|
| 34 |
self.env_file = Path.cwd() / ".env"
|
| 35 |
self.pending_changes = {} # key -> value (None means delete)
|
| 36 |
self.load_current_settings()
|
| 37 |
+
|
| 38 |
def load_current_settings(self):
|
| 39 |
"""Load current .env values into env vars"""
|
| 40 |
from dotenv import load_dotenv
|
| 41 |
+
|
| 42 |
load_dotenv(override=True)
|
| 43 |
+
|
| 44 |
def set(self, key: str, value: str):
|
| 45 |
"""Stage a change"""
|
| 46 |
self.pending_changes[key] = value
|
| 47 |
+
|
| 48 |
def remove(self, key: str):
|
| 49 |
"""Stage a removal"""
|
| 50 |
self.pending_changes[key] = None
|
| 51 |
+
|
| 52 |
def save(self):
|
| 53 |
"""Write pending changes to .env"""
|
| 54 |
for key, value in self.pending_changes.items():
|
|
|
|
| 58 |
else:
|
| 59 |
# Set key
|
| 60 |
set_key(str(self.env_file), key, value)
|
| 61 |
+
|
| 62 |
self.pending_changes.clear()
|
| 63 |
self.load_current_settings()
|
| 64 |
+
|
| 65 |
def discard(self):
|
| 66 |
"""Discard pending changes"""
|
| 67 |
self.pending_changes.clear()
|
| 68 |
+
|
| 69 |
def has_pending(self) -> bool:
|
| 70 |
"""Check if there are pending changes"""
|
| 71 |
return bool(self.pending_changes)
|
|
|
|
| 73 |
|
| 74 |
class CustomProviderManager:
|
| 75 |
"""Manages custom provider API bases"""
|
| 76 |
+
|
| 77 |
def __init__(self, settings: AdvancedSettings):
|
| 78 |
self.settings = settings
|
| 79 |
+
|
| 80 |
def get_current_providers(self) -> Dict[str, str]:
|
| 81 |
"""Get currently configured custom providers"""
|
| 82 |
from proxy_app.provider_urls import PROVIDER_URL_MAP
|
| 83 |
+
|
| 84 |
providers = {}
|
| 85 |
for key, value in os.environ.items():
|
| 86 |
if key.endswith("_API_BASE"):
|
|
|
|
| 89 |
if provider not in PROVIDER_URL_MAP:
|
| 90 |
providers[provider] = value
|
| 91 |
return providers
|
| 92 |
+
|
| 93 |
def add_provider(self, name: str, api_base: str):
|
| 94 |
"""Add PROVIDER_API_BASE"""
|
| 95 |
key = f"{name.upper()}_API_BASE"
|
| 96 |
self.settings.set(key, api_base)
|
| 97 |
+
|
| 98 |
def edit_provider(self, name: str, api_base: str):
|
| 99 |
"""Edit PROVIDER_API_BASE"""
|
| 100 |
self.add_provider(name, api_base)
|
| 101 |
+
|
| 102 |
def remove_provider(self, name: str):
|
| 103 |
"""Remove PROVIDER_API_BASE"""
|
| 104 |
key = f"{name.upper()}_API_BASE"
|
|
|
|
| 107 |
|
| 108 |
class ModelDefinitionManager:
|
| 109 |
"""Manages PROVIDER_MODELS"""
|
| 110 |
+
|
| 111 |
def __init__(self, settings: AdvancedSettings):
|
| 112 |
self.settings = settings
|
| 113 |
+
|
| 114 |
def get_current_provider_models(self, provider: str) -> Optional[Dict]:
|
| 115 |
"""Get currently configured models for a provider"""
|
| 116 |
key = f"{provider.upper()}_MODELS"
|
|
|
|
| 121 |
except (json.JSONDecodeError, ValueError):
|
| 122 |
return None
|
| 123 |
return None
|
| 124 |
+
|
| 125 |
def get_all_providers_with_models(self) -> Dict[str, int]:
|
| 126 |
"""Get all providers with model definitions"""
|
| 127 |
providers = {}
|
|
|
|
| 137 |
except (json.JSONDecodeError, ValueError):
|
| 138 |
pass
|
| 139 |
return providers
|
| 140 |
+
|
| 141 |
def set_models(self, provider: str, models: Dict[str, Dict[str, Any]]):
|
| 142 |
"""Set PROVIDER_MODELS"""
|
| 143 |
key = f"{provider.upper()}_MODELS"
|
| 144 |
value = json.dumps(models)
|
| 145 |
self.settings.set(key, value)
|
| 146 |
+
|
| 147 |
def remove_models(self, provider: str):
|
| 148 |
"""Remove PROVIDER_MODELS"""
|
| 149 |
key = f"{provider.upper()}_MODELS"
|
|
|
|
| 152 |
|
| 153 |
class ConcurrencyManager:
|
| 154 |
"""Manages MAX_CONCURRENT_REQUESTS_PER_KEY_PROVIDER"""
|
| 155 |
+
|
| 156 |
def __init__(self, settings: AdvancedSettings):
|
| 157 |
self.settings = settings
|
| 158 |
+
|
| 159 |
def get_current_limits(self) -> Dict[str, int]:
|
| 160 |
"""Get currently configured concurrency limits"""
|
| 161 |
limits = {}
|
|
|
|
| 167 |
except (json.JSONDecodeError, ValueError):
|
| 168 |
pass
|
| 169 |
return limits
|
| 170 |
+
|
| 171 |
def set_limit(self, provider: str, limit: int):
|
| 172 |
"""Set concurrency limit"""
|
| 173 |
key = f"MAX_CONCURRENT_REQUESTS_PER_KEY_{provider.upper()}"
|
| 174 |
self.settings.set(key, str(limit))
|
| 175 |
+
|
| 176 |
def remove_limit(self, provider: str):
|
| 177 |
"""Remove concurrency limit (reset to default)"""
|
| 178 |
key = f"MAX_CONCURRENT_REQUESTS_PER_KEY_{provider.upper()}"
|
| 179 |
self.settings.remove(key)
|
| 180 |
|
| 181 |
|
| 182 |
+
class RotationModeManager:
|
| 183 |
+
"""Manages ROTATION_MODE_PROVIDER settings for sequential/balanced credential rotation"""
|
| 184 |
+
|
| 185 |
+
VALID_MODES = ["balanced", "sequential"]
|
| 186 |
+
|
| 187 |
+
def __init__(self, settings: AdvancedSettings):
|
| 188 |
+
self.settings = settings
|
| 189 |
+
|
| 190 |
+
def get_current_modes(self) -> Dict[str, str]:
|
| 191 |
+
"""Get currently configured rotation modes"""
|
| 192 |
+
modes = {}
|
| 193 |
+
for key, value in os.environ.items():
|
| 194 |
+
if key.startswith("ROTATION_MODE_"):
|
| 195 |
+
provider = key.replace("ROTATION_MODE_", "").lower()
|
| 196 |
+
if value.lower() in self.VALID_MODES:
|
| 197 |
+
modes[provider] = value.lower()
|
| 198 |
+
return modes
|
| 199 |
+
|
| 200 |
+
def get_default_mode(self, provider: str) -> str:
|
| 201 |
+
"""Get the default rotation mode for a provider"""
|
| 202 |
+
try:
|
| 203 |
+
from rotator_library.providers import PROVIDER_PLUGINS
|
| 204 |
+
|
| 205 |
+
provider_class = PROVIDER_PLUGINS.get(provider.lower())
|
| 206 |
+
if provider_class and hasattr(provider_class, "default_rotation_mode"):
|
| 207 |
+
return provider_class.default_rotation_mode
|
| 208 |
+
return "balanced"
|
| 209 |
+
except ImportError:
|
| 210 |
+
# Fallback defaults if import fails
|
| 211 |
+
if provider.lower() == "antigravity":
|
| 212 |
+
return "sequential"
|
| 213 |
+
return "balanced"
|
| 214 |
+
|
| 215 |
+
def get_effective_mode(self, provider: str) -> str:
|
| 216 |
+
"""Get the effective rotation mode (configured or default)"""
|
| 217 |
+
configured = self.get_current_modes().get(provider.lower())
|
| 218 |
+
if configured:
|
| 219 |
+
return configured
|
| 220 |
+
return self.get_default_mode(provider)
|
| 221 |
+
|
| 222 |
+
def set_mode(self, provider: str, mode: str):
|
| 223 |
+
"""Set rotation mode for a provider"""
|
| 224 |
+
if mode.lower() not in self.VALID_MODES:
|
| 225 |
+
raise ValueError(
|
| 226 |
+
f"Invalid rotation mode: {mode}. Must be one of {self.VALID_MODES}"
|
| 227 |
+
)
|
| 228 |
+
key = f"ROTATION_MODE_{provider.upper()}"
|
| 229 |
+
self.settings.set(key, mode.lower())
|
| 230 |
+
|
| 231 |
+
def remove_mode(self, provider: str):
|
| 232 |
+
"""Remove rotation mode (reset to provider default)"""
|
| 233 |
+
key = f"ROTATION_MODE_{provider.upper()}"
|
| 234 |
+
self.settings.remove(key)
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
class PriorityMultiplierManager:
|
| 238 |
+
"""Manages CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N> settings"""
|
| 239 |
+
|
| 240 |
+
def __init__(self, settings: AdvancedSettings):
|
| 241 |
+
self.settings = settings
|
| 242 |
+
|
| 243 |
+
def get_provider_defaults(self, provider: str) -> Dict[int, int]:
|
| 244 |
+
"""Get default priority multipliers from provider class"""
|
| 245 |
+
try:
|
| 246 |
+
from rotator_library.providers import PROVIDER_PLUGINS
|
| 247 |
+
|
| 248 |
+
provider_class = PROVIDER_PLUGINS.get(provider.lower())
|
| 249 |
+
if provider_class and hasattr(
|
| 250 |
+
provider_class, "default_priority_multipliers"
|
| 251 |
+
):
|
| 252 |
+
return dict(provider_class.default_priority_multipliers)
|
| 253 |
+
except ImportError:
|
| 254 |
+
pass
|
| 255 |
+
return {}
|
| 256 |
+
|
| 257 |
+
def get_sequential_fallback(self, provider: str) -> int:
|
| 258 |
+
"""Get sequential fallback multiplier from provider class"""
|
| 259 |
+
try:
|
| 260 |
+
from rotator_library.providers import PROVIDER_PLUGINS
|
| 261 |
+
|
| 262 |
+
provider_class = PROVIDER_PLUGINS.get(provider.lower())
|
| 263 |
+
if provider_class and hasattr(
|
| 264 |
+
provider_class, "default_sequential_fallback_multiplier"
|
| 265 |
+
):
|
| 266 |
+
return provider_class.default_sequential_fallback_multiplier
|
| 267 |
+
except ImportError:
|
| 268 |
+
pass
|
| 269 |
+
return 1
|
| 270 |
+
|
| 271 |
+
def get_current_multipliers(self) -> Dict[str, Dict[int, int]]:
|
| 272 |
+
"""Get currently configured priority multipliers from env vars"""
|
| 273 |
+
multipliers: Dict[str, Dict[int, int]] = {}
|
| 274 |
+
for key, value in os.environ.items():
|
| 275 |
+
if key.startswith("CONCURRENCY_MULTIPLIER_") and "_PRIORITY_" in key:
|
| 276 |
+
try:
|
| 277 |
+
# Parse: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>
|
| 278 |
+
parts = key.split("_PRIORITY_")
|
| 279 |
+
provider = parts[0].replace("CONCURRENCY_MULTIPLIER_", "").lower()
|
| 280 |
+
remainder = parts[1]
|
| 281 |
+
|
| 282 |
+
# Check if mode-specific (has _SEQUENTIAL or _BALANCED suffix)
|
| 283 |
+
if "_" in remainder:
|
| 284 |
+
continue # Skip mode-specific for now (show in separate view)
|
| 285 |
+
|
| 286 |
+
priority = int(remainder)
|
| 287 |
+
multiplier = int(value)
|
| 288 |
+
|
| 289 |
+
if provider not in multipliers:
|
| 290 |
+
multipliers[provider] = {}
|
| 291 |
+
multipliers[provider][priority] = multiplier
|
| 292 |
+
except (ValueError, IndexError):
|
| 293 |
+
pass
|
| 294 |
+
return multipliers
|
| 295 |
+
|
| 296 |
+
def get_effective_multiplier(self, provider: str, priority: int) -> int:
|
| 297 |
+
"""Get effective multiplier (configured, provider default, or 1)"""
|
| 298 |
+
# Check env var override
|
| 299 |
+
current = self.get_current_multipliers()
|
| 300 |
+
if provider.lower() in current:
|
| 301 |
+
if priority in current[provider.lower()]:
|
| 302 |
+
return current[provider.lower()][priority]
|
| 303 |
+
|
| 304 |
+
# Check provider defaults
|
| 305 |
+
defaults = self.get_provider_defaults(provider)
|
| 306 |
+
if priority in defaults:
|
| 307 |
+
return defaults[priority]
|
| 308 |
+
|
| 309 |
+
# Return 1 (no multiplier)
|
| 310 |
+
return 1
|
| 311 |
+
|
| 312 |
+
def set_multiplier(self, provider: str, priority: int, multiplier: int):
|
| 313 |
+
"""Set priority multiplier for a provider"""
|
| 314 |
+
if multiplier < 1:
|
| 315 |
+
raise ValueError("Multiplier must be >= 1")
|
| 316 |
+
key = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_{priority}"
|
| 317 |
+
self.settings.set(key, str(multiplier))
|
| 318 |
+
|
| 319 |
+
def remove_multiplier(self, provider: str, priority: int):
|
| 320 |
+
"""Remove multiplier (reset to provider default)"""
|
| 321 |
+
key = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_{priority}"
|
| 322 |
+
self.settings.remove(key)
|
| 323 |
+
|
| 324 |
+
|
| 325 |
# =============================================================================
|
| 326 |
# PROVIDER-SPECIFIC SETTINGS DEFINITIONS
|
| 327 |
# =============================================================================
|
|
|
|
| 438 |
|
| 439 |
class ProviderSettingsManager:
|
| 440 |
"""Manages provider-specific configuration settings"""
|
| 441 |
+
|
| 442 |
def __init__(self, settings: AdvancedSettings):
|
| 443 |
self.settings = settings
|
| 444 |
+
|
| 445 |
def get_available_providers(self) -> List[str]:
|
| 446 |
"""Get list of providers with specific settings available"""
|
| 447 |
return list(PROVIDER_SETTINGS_MAP.keys())
|
| 448 |
+
|
| 449 |
+
def get_provider_settings_definitions(
|
| 450 |
+
self, provider: str
|
| 451 |
+
) -> Dict[str, Dict[str, Any]]:
|
| 452 |
"""Get settings definitions for a provider"""
|
| 453 |
return PROVIDER_SETTINGS_MAP.get(provider, {})
|
| 454 |
+
|
| 455 |
def get_current_value(self, key: str, definition: Dict[str, Any]) -> Any:
|
| 456 |
"""Get current value of a setting from environment"""
|
| 457 |
env_value = os.getenv(key)
|
| 458 |
if env_value is None:
|
| 459 |
return definition.get("default")
|
| 460 |
+
|
| 461 |
setting_type = definition.get("type", "str")
|
| 462 |
try:
|
| 463 |
if setting_type == "bool":
|
|
|
|
| 468 |
return env_value
|
| 469 |
except (ValueError, AttributeError):
|
| 470 |
return definition.get("default")
|
| 471 |
+
|
| 472 |
def get_all_current_values(self, provider: str) -> Dict[str, Any]:
|
| 473 |
"""Get all current values for a provider"""
|
| 474 |
definitions = self.get_provider_settings_definitions(provider)
|
|
|
|
| 476 |
for key, definition in definitions.items():
|
| 477 |
values[key] = self.get_current_value(key, definition)
|
| 478 |
return values
|
| 479 |
+
|
| 480 |
def set_value(self, key: str, value: Any, definition: Dict[str, Any]):
|
| 481 |
"""Set a setting value, converting to string for .env storage"""
|
| 482 |
setting_type = definition.get("type", "str")
|
|
|
|
| 485 |
else:
|
| 486 |
str_value = str(value)
|
| 487 |
self.settings.set(key, str_value)
|
| 488 |
+
|
| 489 |
def reset_to_default(self, key: str):
|
| 490 |
"""Remove a setting to reset it to default"""
|
| 491 |
self.settings.remove(key)
|
| 492 |
+
|
| 493 |
def get_modified_settings(self, provider: str) -> Dict[str, Any]:
|
| 494 |
"""Get settings that differ from defaults"""
|
| 495 |
definitions = self.get_provider_settings_definitions(provider)
|
|
|
|
| 504 |
|
| 505 |
class SettingsTool:
|
| 506 |
"""Main settings tool TUI"""
|
| 507 |
+
|
| 508 |
def __init__(self):
|
| 509 |
self.console = Console()
|
| 510 |
self.settings = AdvancedSettings()
|
| 511 |
self.provider_mgr = CustomProviderManager(self.settings)
|
| 512 |
self.model_mgr = ModelDefinitionManager(self.settings)
|
| 513 |
self.concurrency_mgr = ConcurrencyManager(self.settings)
|
| 514 |
+
self.rotation_mgr = RotationModeManager(self.settings)
|
| 515 |
+
self.priority_multiplier_mgr = PriorityMultiplierManager(self.settings)
|
| 516 |
self.provider_settings_mgr = ProviderSettingsManager(self.settings)
|
| 517 |
self.running = True
|
| 518 |
+
|
| 519 |
def get_available_providers(self) -> List[str]:
|
| 520 |
"""Get list of providers that have credentials configured"""
|
| 521 |
env_file = Path.cwd() / ".env"
|
| 522 |
providers = set()
|
| 523 |
+
|
| 524 |
# Scan for providers with API keys from local .env
|
| 525 |
if env_file.exists():
|
| 526 |
try:
|
| 527 |
+
with open(env_file, "r", encoding="utf-8") as f:
|
| 528 |
for line in f:
|
| 529 |
line = line.strip()
|
| 530 |
+
# Skip comments and empty lines
|
| 531 |
+
if not line or line.startswith("#"):
|
| 532 |
+
continue
|
| 533 |
+
if (
|
| 534 |
+
"_API_KEY" in line
|
| 535 |
+
and "PROXY_API_KEY" not in line
|
| 536 |
+
and "=" in line
|
| 537 |
+
):
|
| 538 |
provider = line.split("_API_KEY")[0].strip().lower()
|
| 539 |
providers.add(provider)
|
| 540 |
except (IOError, OSError):
|
| 541 |
pass
|
| 542 |
+
|
| 543 |
# Also check for OAuth providers from files
|
| 544 |
+
oauth_dir = Path("oauth_creds")
|
| 545 |
if oauth_dir.exists():
|
| 546 |
for file in oauth_dir.glob("*_oauth_*.json"):
|
| 547 |
provider = file.name.split("_oauth_")[0]
|
| 548 |
providers.add(provider)
|
| 549 |
+
|
| 550 |
return sorted(list(providers))
|
| 551 |
|
| 552 |
def run(self):
|
| 553 |
"""Main loop"""
|
| 554 |
while self.running:
|
| 555 |
self.show_main_menu()
|
| 556 |
+
|
| 557 |
def show_main_menu(self):
|
| 558 |
"""Display settings categories"""
|
| 559 |
clear_screen()
|
| 560 |
+
|
| 561 |
+
self.console.print(
|
| 562 |
+
Panel.fit(
|
| 563 |
+
"[bold cyan]🔧 Advanced Settings Configuration[/bold cyan]",
|
| 564 |
+
border_style="cyan",
|
| 565 |
+
)
|
| 566 |
+
)
|
| 567 |
+
|
| 568 |
self.console.print()
|
| 569 |
self.console.print("[bold]⚙️ Configuration Categories[/bold]")
|
| 570 |
self.console.print()
|
| 571 |
self.console.print(" 1. 🌐 Custom Provider API Bases")
|
| 572 |
self.console.print(" 2. 📦 Provider Model Definitions")
|
| 573 |
self.console.print(" 3. ⚡ Concurrency Limits")
|
| 574 |
+
self.console.print(" 4. 🔄 Rotation Modes")
|
| 575 |
+
self.console.print(" 5. 🔬 Provider-Specific Settings")
|
| 576 |
+
self.console.print(" 6. 💾 Save & Exit")
|
| 577 |
+
self.console.print(" 7. 🚫 Exit Without Saving")
|
| 578 |
+
|
| 579 |
self.console.print()
|
| 580 |
self.console.print("━" * 70)
|
| 581 |
+
|
| 582 |
if self.settings.has_pending():
|
| 583 |
+
self.console.print(
|
| 584 |
+
'[yellow]ℹ️ Changes are pending until you select "Save & Exit"[/yellow]'
|
| 585 |
+
)
|
| 586 |
else:
|
| 587 |
self.console.print("[dim]ℹ️ No pending changes[/dim]")
|
| 588 |
+
|
| 589 |
self.console.print()
|
| 590 |
+
self.console.print(
|
| 591 |
+
"[dim]⚠️ Model filters not supported - edit .env for IGNORE_MODELS_* / WHITELIST_MODELS_*[/dim]"
|
| 592 |
+
)
|
| 593 |
self.console.print()
|
| 594 |
+
|
| 595 |
+
choice = Prompt.ask(
|
| 596 |
+
"Select option",
|
| 597 |
+
choices=["1", "2", "3", "4", "5", "6", "7"],
|
| 598 |
+
show_choices=False,
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
if choice == "1":
|
| 602 |
self.manage_custom_providers()
|
| 603 |
elif choice == "2":
|
|
|
|
| 605 |
elif choice == "3":
|
| 606 |
self.manage_concurrency_limits()
|
| 607 |
elif choice == "4":
|
| 608 |
+
self.manage_rotation_modes()
|
| 609 |
elif choice == "5":
|
| 610 |
+
self.manage_provider_settings()
|
| 611 |
elif choice == "6":
|
| 612 |
+
self.save_and_exit()
|
| 613 |
+
elif choice == "7":
|
| 614 |
self.exit_without_saving()
|
| 615 |
+
|
| 616 |
def manage_custom_providers(self):
|
| 617 |
"""Manage custom provider API bases"""
|
| 618 |
while True:
|
| 619 |
clear_screen()
|
| 620 |
+
|
| 621 |
providers = self.provider_mgr.get_current_providers()
|
| 622 |
+
|
| 623 |
+
self.console.print(
|
| 624 |
+
Panel.fit(
|
| 625 |
+
"[bold cyan]🌐 Custom Provider API Bases[/bold cyan]",
|
| 626 |
+
border_style="cyan",
|
| 627 |
+
)
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
self.console.print()
|
| 631 |
self.console.print("[bold]📋 Configured Custom Providers[/bold]")
|
| 632 |
self.console.print("━" * 70)
|
| 633 |
+
|
| 634 |
if providers:
|
| 635 |
for name, base in providers.items():
|
| 636 |
self.console.print(f" • {name:15} {base}")
|
| 637 |
else:
|
| 638 |
self.console.print(" [dim]No custom providers configured[/dim]")
|
| 639 |
+
|
| 640 |
self.console.print()
|
| 641 |
self.console.print("━" * 70)
|
| 642 |
self.console.print()
|
|
|
|
| 646 |
self.console.print(" 2. ✏️ Edit Existing Provider")
|
| 647 |
self.console.print(" 3. 🗑️ Remove Provider")
|
| 648 |
self.console.print(" 4. ↩️ Back to Settings Menu")
|
| 649 |
+
|
| 650 |
self.console.print()
|
| 651 |
self.console.print("━" * 70)
|
| 652 |
self.console.print()
|
| 653 |
+
|
| 654 |
+
choice = Prompt.ask(
|
| 655 |
+
"Select option", choices=["1", "2", "3", "4"], show_choices=False
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
if choice == "1":
|
| 659 |
name = Prompt.ask("Provider name (e.g., 'opencode')").strip().lower()
|
| 660 |
if name:
|
| 661 |
api_base = Prompt.ask("API Base URL").strip()
|
| 662 |
if api_base:
|
| 663 |
self.provider_mgr.add_provider(name, api_base)
|
| 664 |
+
self.console.print(
|
| 665 |
+
f"\n[green]✅ Custom provider '{name}' configured![/green]"
|
| 666 |
+
)
|
| 667 |
+
self.console.print(
|
| 668 |
+
f" To use: set {name.upper()}_API_KEY in credentials"
|
| 669 |
+
)
|
| 670 |
input("\nPress Enter to continue...")
|
| 671 |
+
|
| 672 |
elif choice == "2":
|
| 673 |
if not providers:
|
| 674 |
self.console.print("\n[yellow]No providers to edit[/yellow]")
|
| 675 |
input("\nPress Enter to continue...")
|
| 676 |
continue
|
| 677 |
+
|
| 678 |
# Show numbered list
|
| 679 |
self.console.print("\n[bold]Select provider to edit:[/bold]")
|
| 680 |
providers_list = list(providers.keys())
|
| 681 |
for idx, prov in enumerate(providers_list, 1):
|
| 682 |
self.console.print(f" {idx}. {prov}")
|
| 683 |
+
|
| 684 |
+
choice_idx = IntPrompt.ask(
|
| 685 |
+
"Select option",
|
| 686 |
+
choices=[str(i) for i in range(1, len(providers_list) + 1)],
|
| 687 |
+
)
|
| 688 |
name = providers_list[choice_idx - 1]
|
| 689 |
current_base = providers.get(name, "")
|
| 690 |
+
|
| 691 |
self.console.print(f"\nCurrent API Base: {current_base}")
|
| 692 |
+
new_base = Prompt.ask(
|
| 693 |
+
"New API Base [press Enter to keep current]", default=current_base
|
| 694 |
+
).strip()
|
| 695 |
+
|
| 696 |
if new_base and new_base != current_base:
|
| 697 |
self.provider_mgr.edit_provider(name, new_base)
|
| 698 |
+
self.console.print(
|
| 699 |
+
f"\n[green]✅ Custom provider '{name}' updated![/green]"
|
| 700 |
+
)
|
| 701 |
else:
|
| 702 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 703 |
input("\nPress Enter to continue...")
|
| 704 |
+
|
| 705 |
elif choice == "3":
|
| 706 |
if not providers:
|
| 707 |
self.console.print("\n[yellow]No providers to remove[/yellow]")
|
| 708 |
input("\nPress Enter to continue...")
|
| 709 |
continue
|
| 710 |
+
|
| 711 |
# Show numbered list
|
| 712 |
self.console.print("\n[bold]Select provider to remove:[/bold]")
|
| 713 |
providers_list = list(providers.keys())
|
| 714 |
for idx, prov in enumerate(providers_list, 1):
|
| 715 |
self.console.print(f" {idx}. {prov}")
|
| 716 |
+
|
| 717 |
+
choice_idx = IntPrompt.ask(
|
| 718 |
+
"Select option",
|
| 719 |
+
choices=[str(i) for i in range(1, len(providers_list) + 1)],
|
| 720 |
+
)
|
| 721 |
name = providers_list[choice_idx - 1]
|
| 722 |
+
|
| 723 |
if Confirm.ask(f"Remove '{name}'?"):
|
| 724 |
self.provider_mgr.remove_provider(name)
|
| 725 |
+
self.console.print(
|
| 726 |
+
f"\n[green]✅ Provider '{name}' removed![/green]"
|
| 727 |
+
)
|
| 728 |
input("\nPress Enter to continue...")
|
| 729 |
+
|
| 730 |
elif choice == "4":
|
| 731 |
break
|
| 732 |
+
|
| 733 |
def manage_model_definitions(self):
|
| 734 |
"""Manage provider model definitions"""
|
| 735 |
while True:
|
| 736 |
clear_screen()
|
| 737 |
+
|
| 738 |
all_providers = self.model_mgr.get_all_providers_with_models()
|
| 739 |
+
|
| 740 |
+
self.console.print(
|
| 741 |
+
Panel.fit(
|
| 742 |
+
"[bold cyan]📦 Provider Model Definitions[/bold cyan]",
|
| 743 |
+
border_style="cyan",
|
| 744 |
+
)
|
| 745 |
+
)
|
| 746 |
+
|
| 747 |
self.console.print()
|
| 748 |
self.console.print("[bold]📋 Configured Provider Models[/bold]")
|
| 749 |
self.console.print("━" * 70)
|
| 750 |
+
|
| 751 |
if all_providers:
|
| 752 |
for provider, count in all_providers.items():
|
| 753 |
+
self.console.print(
|
| 754 |
+
f" • {provider:15} {count} model{'s' if count > 1 else ''}"
|
| 755 |
+
)
|
| 756 |
else:
|
| 757 |
self.console.print(" [dim]No model definitions configured[/dim]")
|
| 758 |
+
|
| 759 |
self.console.print()
|
| 760 |
self.console.print("━" * 70)
|
| 761 |
self.console.print()
|
|
|
|
| 766 |
self.console.print(" 3. 👁️ View Provider Models")
|
| 767 |
self.console.print(" 4. 🗑️ Remove Provider Models")
|
| 768 |
self.console.print(" 5. ↩️ Back to Settings Menu")
|
| 769 |
+
|
| 770 |
self.console.print()
|
| 771 |
self.console.print("━" * 70)
|
| 772 |
self.console.print()
|
| 773 |
+
|
| 774 |
+
choice = Prompt.ask(
|
| 775 |
+
"Select option", choices=["1", "2", "3", "4", "5"], show_choices=False
|
| 776 |
+
)
|
| 777 |
+
|
| 778 |
if choice == "1":
|
| 779 |
self.add_model_definitions()
|
| 780 |
elif choice == "2":
|
|
|
|
| 794 |
self.console.print("\n[yellow]No providers to remove[/yellow]")
|
| 795 |
input("\nPress Enter to continue...")
|
| 796 |
continue
|
| 797 |
+
|
| 798 |
# Show numbered list
|
| 799 |
+
self.console.print(
|
| 800 |
+
"\n[bold]Select provider to remove models from:[/bold]"
|
| 801 |
+
)
|
| 802 |
providers_list = list(all_providers.keys())
|
| 803 |
for idx, prov in enumerate(providers_list, 1):
|
| 804 |
self.console.print(f" {idx}. {prov}")
|
| 805 |
+
|
| 806 |
+
choice_idx = IntPrompt.ask(
|
| 807 |
+
"Select option",
|
| 808 |
+
choices=[str(i) for i in range(1, len(providers_list) + 1)],
|
| 809 |
+
)
|
| 810 |
provider = providers_list[choice_idx - 1]
|
| 811 |
+
|
| 812 |
if Confirm.ask(f"Remove all model definitions for '{provider}'?"):
|
| 813 |
self.model_mgr.remove_models(provider)
|
| 814 |
+
self.console.print(
|
| 815 |
+
f"\n[green]✅ Model definitions removed for '{provider}'![/green]"
|
| 816 |
+
)
|
| 817 |
input("\nPress Enter to continue...")
|
| 818 |
elif choice == "5":
|
| 819 |
break
|
| 820 |
+
|
| 821 |
def add_model_definitions(self):
|
| 822 |
"""Add model definitions for a provider"""
|
| 823 |
# Get available providers from credentials
|
| 824 |
available_providers = self.get_available_providers()
|
| 825 |
+
|
| 826 |
if not available_providers:
|
| 827 |
+
self.console.print(
|
| 828 |
+
"\n[yellow]No providers with credentials found. Please add credentials first.[/yellow]"
|
| 829 |
+
)
|
| 830 |
input("\nPress Enter to continue...")
|
| 831 |
return
|
| 832 |
+
|
| 833 |
# Show provider selection menu
|
| 834 |
self.console.print("\n[bold]Select provider:[/bold]")
|
| 835 |
for idx, prov in enumerate(available_providers, 1):
|
| 836 |
self.console.print(f" {idx}. {prov}")
|
| 837 |
+
self.console.print(
|
| 838 |
+
f" {len(available_providers) + 1}. Enter custom provider name"
|
| 839 |
+
)
|
| 840 |
+
|
| 841 |
+
choice = IntPrompt.ask(
|
| 842 |
+
"Select option",
|
| 843 |
+
choices=[str(i) for i in range(1, len(available_providers) + 2)],
|
| 844 |
+
)
|
| 845 |
+
|
| 846 |
if choice == len(available_providers) + 1:
|
| 847 |
provider = Prompt.ask("Provider name").strip().lower()
|
| 848 |
else:
|
| 849 |
provider = available_providers[choice - 1]
|
| 850 |
+
|
| 851 |
if not provider:
|
| 852 |
return
|
| 853 |
+
|
| 854 |
self.console.print("\nHow would you like to define models?")
|
| 855 |
self.console.print(" 1. Simple list (names only)")
|
| 856 |
self.console.print(" 2. Advanced (names with IDs and options)")
|
| 857 |
+
|
| 858 |
mode = Prompt.ask("Select mode", choices=["1", "2"], show_choices=False)
|
| 859 |
+
|
| 860 |
models = {}
|
| 861 |
+
|
| 862 |
if mode == "1":
|
| 863 |
# Simple mode
|
| 864 |
while True:
|
|
|
|
| 875 |
break
|
| 876 |
if name:
|
| 877 |
model_def = {}
|
| 878 |
+
model_id = Prompt.ask(
|
| 879 |
+
f"Model ID [press Enter to use '{name}']", default=name
|
| 880 |
+
).strip()
|
| 881 |
if model_id and model_id != name:
|
| 882 |
model_def["id"] = model_id
|
| 883 |
+
|
| 884 |
# Optional: model options
|
| 885 |
+
if Confirm.ask(
|
| 886 |
+
"Add model options (e.g., temperature limits)?", default=False
|
| 887 |
+
):
|
| 888 |
+
self.console.print(
|
| 889 |
+
"\nEnter options as key=value pairs (one per line, 'done' to finish):"
|
| 890 |
+
)
|
| 891 |
options = {}
|
| 892 |
while True:
|
| 893 |
opt = Prompt.ask("Option").strip()
|
|
|
|
| 904 |
options[key.strip()] = value
|
| 905 |
if options:
|
| 906 |
model_def["options"] = options
|
| 907 |
+
|
| 908 |
models[name] = model_def
|
| 909 |
+
|
| 910 |
if models:
|
| 911 |
self.model_mgr.set_models(provider, models)
|
| 912 |
+
self.console.print(
|
| 913 |
+
f"\n[green]✅ Model definitions saved for '{provider}'![/green]"
|
| 914 |
+
)
|
| 915 |
else:
|
| 916 |
self.console.print("\n[yellow]No models added[/yellow]")
|
| 917 |
+
|
| 918 |
input("\nPress Enter to continue...")
|
| 919 |
+
|
| 920 |
def edit_model_definitions(self, providers: List[str]):
|
| 921 |
"""Edit existing model definitions"""
|
| 922 |
# Show numbered list
|
| 923 |
self.console.print("\n[bold]Select provider to edit:[/bold]")
|
| 924 |
for idx, prov in enumerate(providers, 1):
|
| 925 |
self.console.print(f" {idx}. {prov}")
|
| 926 |
+
|
| 927 |
+
choice_idx = IntPrompt.ask(
|
| 928 |
+
"Select option", choices=[str(i) for i in range(1, len(providers) + 1)]
|
| 929 |
+
)
|
| 930 |
provider = providers[choice_idx - 1]
|
| 931 |
+
|
| 932 |
current_models = self.model_mgr.get_current_provider_models(provider)
|
| 933 |
if not current_models:
|
| 934 |
self.console.print(f"\n[yellow]No models found for '{provider}'[/yellow]")
|
| 935 |
input("\nPress Enter to continue...")
|
| 936 |
return
|
| 937 |
+
|
| 938 |
# Convert to dict if list
|
| 939 |
if isinstance(current_models, list):
|
| 940 |
current_models = {m: {} for m in current_models}
|
| 941 |
+
|
| 942 |
while True:
|
| 943 |
clear_screen()
|
| 944 |
self.console.print(f"[bold]Editing models for: {provider}[/bold]\n")
|
| 945 |
self.console.print("Current models:")
|
| 946 |
for i, (name, definition) in enumerate(current_models.items(), 1):
|
| 947 |
+
model_id = (
|
| 948 |
+
definition.get("id", name) if isinstance(definition, dict) else name
|
| 949 |
+
)
|
| 950 |
self.console.print(f" {i}. {name} (ID: {model_id})")
|
| 951 |
+
|
| 952 |
self.console.print("\nOptions:")
|
| 953 |
self.console.print(" 1. Add new model")
|
| 954 |
self.console.print(" 2. Edit existing model")
|
| 955 |
self.console.print(" 3. Remove model")
|
| 956 |
self.console.print(" 4. Done")
|
| 957 |
+
|
| 958 |
+
choice = Prompt.ask(
|
| 959 |
+
"\nSelect option", choices=["1", "2", "3", "4"], show_choices=False
|
| 960 |
+
)
|
| 961 |
+
|
| 962 |
if choice == "1":
|
| 963 |
name = Prompt.ask("New model name").strip()
|
| 964 |
if name and name not in current_models:
|
| 965 |
model_id = Prompt.ask("Model ID", default=name).strip()
|
| 966 |
current_models[name] = {"id": model_id} if model_id != name else {}
|
| 967 |
+
|
| 968 |
elif choice == "2":
|
| 969 |
# Show numbered list
|
| 970 |
models_list = list(current_models.keys())
|
| 971 |
self.console.print("\n[bold]Select model to edit:[/bold]")
|
| 972 |
for idx, model_name in enumerate(models_list, 1):
|
| 973 |
self.console.print(f" {idx}. {model_name}")
|
| 974 |
+
|
| 975 |
+
model_idx = IntPrompt.ask(
|
| 976 |
+
"Select option",
|
| 977 |
+
choices=[str(i) for i in range(1, len(models_list) + 1)],
|
| 978 |
+
)
|
| 979 |
name = models_list[model_idx - 1]
|
| 980 |
+
|
| 981 |
current_def = current_models[name]
|
| 982 |
+
current_id = (
|
| 983 |
+
current_def.get("id", name)
|
| 984 |
+
if isinstance(current_def, dict)
|
| 985 |
+
else name
|
| 986 |
+
)
|
| 987 |
+
|
| 988 |
new_id = Prompt.ask("Model ID", default=current_id).strip()
|
| 989 |
current_models[name] = {"id": new_id} if new_id != name else {}
|
| 990 |
+
|
| 991 |
elif choice == "3":
|
| 992 |
# Show numbered list
|
| 993 |
models_list = list(current_models.keys())
|
| 994 |
self.console.print("\n[bold]Select model to remove:[/bold]")
|
| 995 |
for idx, model_name in enumerate(models_list, 1):
|
| 996 |
self.console.print(f" {idx}. {model_name}")
|
| 997 |
+
|
| 998 |
+
model_idx = IntPrompt.ask(
|
| 999 |
+
"Select option",
|
| 1000 |
+
choices=[str(i) for i in range(1, len(models_list) + 1)],
|
| 1001 |
+
)
|
| 1002 |
name = models_list[model_idx - 1]
|
| 1003 |
+
|
| 1004 |
if Confirm.ask(f"Remove '{name}'?"):
|
| 1005 |
del current_models[name]
|
| 1006 |
+
|
| 1007 |
elif choice == "4":
|
| 1008 |
break
|
| 1009 |
+
|
| 1010 |
if current_models:
|
| 1011 |
self.model_mgr.set_models(provider, current_models)
|
| 1012 |
self.console.print(f"\n[green]✅ Models updated for '{provider}'![/green]")
|
| 1013 |
else:
|
| 1014 |
+
self.console.print(
|
| 1015 |
+
"\n[yellow]No models left - removing definition[/yellow]"
|
| 1016 |
+
)
|
| 1017 |
self.model_mgr.remove_models(provider)
|
| 1018 |
+
|
| 1019 |
input("\nPress Enter to continue...")
|
| 1020 |
+
|
| 1021 |
def view_model_definitions(self, providers: List[str]):
|
| 1022 |
"""View model definitions for a provider"""
|
| 1023 |
# Show numbered list
|
| 1024 |
self.console.print("\n[bold]Select provider to view:[/bold]")
|
| 1025 |
for idx, prov in enumerate(providers, 1):
|
| 1026 |
self.console.print(f" {idx}. {prov}")
|
| 1027 |
+
|
| 1028 |
+
choice_idx = IntPrompt.ask(
|
| 1029 |
+
"Select option", choices=[str(i) for i in range(1, len(providers) + 1)]
|
| 1030 |
+
)
|
| 1031 |
provider = providers[choice_idx - 1]
|
| 1032 |
+
|
| 1033 |
models = self.model_mgr.get_current_provider_models(provider)
|
| 1034 |
if not models:
|
| 1035 |
self.console.print(f"\n[yellow]No models found for '{provider}'[/yellow]")
|
| 1036 |
input("\nPress Enter to continue...")
|
| 1037 |
return
|
| 1038 |
+
|
| 1039 |
clear_screen()
|
| 1040 |
self.console.print(f"[bold]Provider: {provider}[/bold]\n")
|
| 1041 |
self.console.print("[bold]📦 Configured Models:[/bold]")
|
| 1042 |
self.console.print("━" * 50)
|
| 1043 |
+
|
| 1044 |
# Handle both dict and list formats
|
| 1045 |
if isinstance(models, dict):
|
| 1046 |
for name, definition in models.items():
|
|
|
|
| 1058 |
for name in models:
|
| 1059 |
self.console.print(f" Name: {name}")
|
| 1060 |
self.console.print()
|
| 1061 |
+
|
| 1062 |
input("Press Enter to return...")
|
| 1063 |
+
|
| 1064 |
def manage_provider_settings(self):
|
| 1065 |
"""Manage provider-specific settings (Antigravity, Gemini CLI)"""
|
| 1066 |
while True:
|
| 1067 |
clear_screen()
|
| 1068 |
+
|
| 1069 |
available_providers = self.provider_settings_mgr.get_available_providers()
|
| 1070 |
+
|
| 1071 |
+
self.console.print(
|
| 1072 |
+
Panel.fit(
|
| 1073 |
+
"[bold cyan]🔬 Provider-Specific Settings[/bold cyan]",
|
| 1074 |
+
border_style="cyan",
|
| 1075 |
+
)
|
| 1076 |
+
)
|
| 1077 |
+
|
| 1078 |
self.console.print()
|
| 1079 |
+
self.console.print(
|
| 1080 |
+
"[bold]📋 Available Providers with Custom Settings[/bold]"
|
| 1081 |
+
)
|
| 1082 |
self.console.print("━" * 70)
|
| 1083 |
+
|
| 1084 |
for provider in available_providers:
|
| 1085 |
modified = self.provider_settings_mgr.get_modified_settings(provider)
|
| 1086 |
+
status = (
|
| 1087 |
+
f"[yellow]{len(modified)} modified[/yellow]"
|
| 1088 |
+
if modified
|
| 1089 |
+
else "[dim]defaults[/dim]"
|
| 1090 |
+
)
|
| 1091 |
display_name = provider.replace("_", " ").title()
|
| 1092 |
self.console.print(f" • {display_name:20} {status}")
|
| 1093 |
+
|
| 1094 |
self.console.print()
|
| 1095 |
self.console.print("━" * 70)
|
| 1096 |
self.console.print()
|
| 1097 |
self.console.print("[bold]⚙️ Select Provider to Configure[/bold]")
|
| 1098 |
self.console.print()
|
| 1099 |
+
|
| 1100 |
for idx, provider in enumerate(available_providers, 1):
|
| 1101 |
display_name = provider.replace("_", " ").title()
|
| 1102 |
self.console.print(f" {idx}. {display_name}")
|
| 1103 |
+
self.console.print(
|
| 1104 |
+
f" {len(available_providers) + 1}. ↩️ Back to Settings Menu"
|
| 1105 |
+
)
|
| 1106 |
+
|
| 1107 |
self.console.print()
|
| 1108 |
self.console.print("━" * 70)
|
| 1109 |
self.console.print()
|
| 1110 |
+
|
| 1111 |
choices = [str(i) for i in range(1, len(available_providers) + 2)]
|
| 1112 |
choice = Prompt.ask("Select option", choices=choices, show_choices=False)
|
| 1113 |
choice_idx = int(choice)
|
| 1114 |
+
|
| 1115 |
if choice_idx == len(available_providers) + 1:
|
| 1116 |
break
|
| 1117 |
+
|
| 1118 |
provider = available_providers[choice_idx - 1]
|
| 1119 |
self._manage_single_provider_settings(provider)
|
| 1120 |
+
|
| 1121 |
def _manage_single_provider_settings(self, provider: str):
|
| 1122 |
"""Manage settings for a single provider"""
|
| 1123 |
while True:
|
| 1124 |
clear_screen()
|
| 1125 |
+
|
| 1126 |
display_name = provider.replace("_", " ").title()
|
| 1127 |
+
definitions = self.provider_settings_mgr.get_provider_settings_definitions(
|
| 1128 |
+
provider
|
| 1129 |
+
)
|
| 1130 |
current_values = self.provider_settings_mgr.get_all_current_values(provider)
|
| 1131 |
+
|
| 1132 |
+
self.console.print(
|
| 1133 |
+
Panel.fit(
|
| 1134 |
+
f"[bold cyan]🔬 {display_name} Settings[/bold cyan]",
|
| 1135 |
+
border_style="cyan",
|
| 1136 |
+
)
|
| 1137 |
+
)
|
| 1138 |
+
|
| 1139 |
self.console.print()
|
| 1140 |
self.console.print("[bold]📋 Current Settings[/bold]")
|
| 1141 |
self.console.print("━" * 70)
|
| 1142 |
+
|
| 1143 |
# Display all settings with current values
|
| 1144 |
settings_list = list(definitions.keys())
|
| 1145 |
for idx, key in enumerate(settings_list, 1):
|
|
|
|
| 1148 |
default = definition.get("default")
|
| 1149 |
setting_type = definition.get("type", "str")
|
| 1150 |
description = definition.get("description", "")
|
| 1151 |
+
|
| 1152 |
# Format value display
|
| 1153 |
if setting_type == "bool":
|
| 1154 |
+
value_display = (
|
| 1155 |
+
"[green]✓ Enabled[/green]"
|
| 1156 |
+
if current
|
| 1157 |
+
else "[red]✗ Disabled[/red]"
|
| 1158 |
+
)
|
| 1159 |
elif setting_type == "int":
|
| 1160 |
value_display = f"[cyan]{current}[/cyan]"
|
| 1161 |
else:
|
| 1162 |
+
value_display = (
|
| 1163 |
+
f"[cyan]{current or '(not set)'}[/cyan]"
|
| 1164 |
+
if current
|
| 1165 |
+
else "[dim](not set)[/dim]"
|
| 1166 |
+
)
|
| 1167 |
+
|
| 1168 |
# Check if modified from default
|
| 1169 |
modified = current != default
|
| 1170 |
mod_marker = "[yellow]*[/yellow]" if modified else " "
|
| 1171 |
+
|
| 1172 |
# Short key name for display (strip provider prefix)
|
| 1173 |
short_key = key.replace(f"{provider.upper()}_", "")
|
| 1174 |
+
|
| 1175 |
+
self.console.print(
|
| 1176 |
+
f" {mod_marker}{idx:2}. {short_key:35} {value_display}"
|
| 1177 |
+
)
|
| 1178 |
self.console.print(f" [dim]{description}[/dim]")
|
| 1179 |
+
|
| 1180 |
self.console.print()
|
| 1181 |
self.console.print("━" * 70)
|
| 1182 |
self.console.print("[dim]* = modified from default[/dim]")
|
|
|
|
| 1187 |
self.console.print(" R. 🔄 Reset Setting to Default")
|
| 1188 |
self.console.print(" A. 🔄 Reset All to Defaults")
|
| 1189 |
self.console.print(" B. ↩️ Back to Provider Selection")
|
| 1190 |
+
|
| 1191 |
self.console.print()
|
| 1192 |
self.console.print("━" * 70)
|
| 1193 |
self.console.print()
|
| 1194 |
+
|
| 1195 |
+
choice = Prompt.ask(
|
| 1196 |
+
"Select action",
|
| 1197 |
+
choices=["e", "r", "a", "b", "E", "R", "A", "B"],
|
| 1198 |
+
show_choices=False,
|
| 1199 |
+
).lower()
|
| 1200 |
+
|
| 1201 |
if choice == "b":
|
| 1202 |
break
|
| 1203 |
elif choice == "e":
|
|
|
|
| 1206 |
self._reset_provider_setting(provider, settings_list, definitions)
|
| 1207 |
elif choice == "a":
|
| 1208 |
self._reset_all_provider_settings(provider, settings_list)
|
| 1209 |
+
|
| 1210 |
+
def _edit_provider_setting(
|
| 1211 |
+
self,
|
| 1212 |
+
provider: str,
|
| 1213 |
+
settings_list: List[str],
|
| 1214 |
+
definitions: Dict[str, Dict[str, Any]],
|
| 1215 |
+
):
|
| 1216 |
"""Edit a single provider setting"""
|
| 1217 |
self.console.print("\n[bold]Select setting number to edit:[/bold]")
|
| 1218 |
+
|
| 1219 |
choices = [str(i) for i in range(1, len(settings_list) + 1)]
|
| 1220 |
choice = IntPrompt.ask("Setting number", choices=choices)
|
| 1221 |
key = settings_list[choice - 1]
|
| 1222 |
definition = definitions[key]
|
| 1223 |
+
|
| 1224 |
current = self.provider_settings_mgr.get_current_value(key, definition)
|
| 1225 |
default = definition.get("default")
|
| 1226 |
setting_type = definition.get("type", "str")
|
| 1227 |
short_key = key.replace(f"{provider.upper()}_", "")
|
| 1228 |
+
|
| 1229 |
self.console.print(f"\n[bold]Editing: {short_key}[/bold]")
|
| 1230 |
self.console.print(f"Current value: [cyan]{current}[/cyan]")
|
| 1231 |
self.console.print(f"Default value: [dim]{default}[/dim]")
|
| 1232 |
self.console.print(f"Type: {setting_type}")
|
| 1233 |
+
|
| 1234 |
if setting_type == "bool":
|
| 1235 |
new_value = Confirm.ask("\nEnable this setting?", default=current)
|
| 1236 |
self.provider_settings_mgr.set_value(key, new_value, definition)
|
|
|
|
| 1241 |
self.provider_settings_mgr.set_value(key, new_value, definition)
|
| 1242 |
self.console.print(f"\n[green]✅ {short_key} set to {new_value}![/green]")
|
| 1243 |
else:
|
| 1244 |
+
new_value = Prompt.ask(
|
| 1245 |
+
"\nNew value", default=str(current) if current else ""
|
| 1246 |
+
).strip()
|
| 1247 |
if new_value:
|
| 1248 |
self.provider_settings_mgr.set_value(key, new_value, definition)
|
| 1249 |
self.console.print(f"\n[green]✅ {short_key} updated![/green]")
|
| 1250 |
else:
|
| 1251 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 1252 |
+
|
| 1253 |
input("\nPress Enter to continue...")
|
| 1254 |
+
|
| 1255 |
+
def _reset_provider_setting(
|
| 1256 |
+
self,
|
| 1257 |
+
provider: str,
|
| 1258 |
+
settings_list: List[str],
|
| 1259 |
+
definitions: Dict[str, Dict[str, Any]],
|
| 1260 |
+
):
|
| 1261 |
"""Reset a single provider setting to default"""
|
| 1262 |
self.console.print("\n[bold]Select setting number to reset:[/bold]")
|
| 1263 |
+
|
| 1264 |
choices = [str(i) for i in range(1, len(settings_list) + 1)]
|
| 1265 |
choice = IntPrompt.ask("Setting number", choices=choices)
|
| 1266 |
key = settings_list[choice - 1]
|
| 1267 |
definition = definitions[key]
|
| 1268 |
+
|
| 1269 |
default = definition.get("default")
|
| 1270 |
short_key = key.replace(f"{provider.upper()}_", "")
|
| 1271 |
+
|
| 1272 |
if Confirm.ask(f"\nReset {short_key} to default ({default})?"):
|
| 1273 |
self.provider_settings_mgr.reset_to_default(key)
|
| 1274 |
self.console.print(f"\n[green]✅ {short_key} reset to default![/green]")
|
| 1275 |
else:
|
| 1276 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 1277 |
+
|
| 1278 |
input("\nPress Enter to continue...")
|
| 1279 |
+
|
| 1280 |
def _reset_all_provider_settings(self, provider: str, settings_list: List[str]):
|
| 1281 |
"""Reset all provider settings to defaults"""
|
| 1282 |
display_name = provider.replace("_", " ").title()
|
| 1283 |
+
|
| 1284 |
+
if Confirm.ask(
|
| 1285 |
+
f"\n[bold red]Reset ALL {display_name} settings to defaults?[/bold red]"
|
| 1286 |
+
):
|
| 1287 |
for key in settings_list:
|
| 1288 |
self.provider_settings_mgr.reset_to_default(key)
|
| 1289 |
+
self.console.print(
|
| 1290 |
+
f"\n[green]✅ All {display_name} settings reset to defaults![/green]"
|
| 1291 |
+
)
|
| 1292 |
else:
|
| 1293 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 1294 |
+
|
| 1295 |
input("\nPress Enter to continue...")
|
| 1296 |
+
|
| 1297 |
+
def manage_rotation_modes(self):
|
| 1298 |
+
"""Manage credential rotation modes (sequential vs balanced)"""
|
| 1299 |
+
while True:
|
| 1300 |
+
clear_screen()
|
| 1301 |
+
|
| 1302 |
+
modes = self.rotation_mgr.get_current_modes()
|
| 1303 |
+
available_providers = self.get_available_providers()
|
| 1304 |
+
|
| 1305 |
+
self.console.print(
|
| 1306 |
+
Panel.fit(
|
| 1307 |
+
"[bold cyan]🔄 Credential Rotation Mode Configuration[/bold cyan]",
|
| 1308 |
+
border_style="cyan",
|
| 1309 |
+
)
|
| 1310 |
+
)
|
| 1311 |
+
|
| 1312 |
+
self.console.print()
|
| 1313 |
+
self.console.print("[bold]📋 Rotation Modes Explained[/bold]")
|
| 1314 |
+
self.console.print("━" * 70)
|
| 1315 |
+
self.console.print(
|
| 1316 |
+
" [cyan]balanced[/cyan] - Rotate credentials evenly across requests (default)"
|
| 1317 |
+
)
|
| 1318 |
+
self.console.print(
|
| 1319 |
+
" [cyan]sequential[/cyan] - Use one credential until exhausted (429), then switch"
|
| 1320 |
+
)
|
| 1321 |
+
self.console.print()
|
| 1322 |
+
self.console.print("[bold]📋 Current Rotation Mode Settings[/bold]")
|
| 1323 |
+
self.console.print("━" * 70)
|
| 1324 |
+
|
| 1325 |
+
if modes:
|
| 1326 |
+
for provider, mode in modes.items():
|
| 1327 |
+
default_mode = self.rotation_mgr.get_default_mode(provider)
|
| 1328 |
+
is_custom = mode != default_mode
|
| 1329 |
+
marker = "[yellow]*[/yellow]" if is_custom else " "
|
| 1330 |
+
mode_display = (
|
| 1331 |
+
f"[green]{mode}[/green]"
|
| 1332 |
+
if mode == "sequential"
|
| 1333 |
+
else f"[blue]{mode}[/blue]"
|
| 1334 |
+
)
|
| 1335 |
+
self.console.print(f" {marker}• {provider:20} {mode_display}")
|
| 1336 |
+
|
| 1337 |
+
# Show providers with default modes
|
| 1338 |
+
providers_with_defaults = [p for p in available_providers if p not in modes]
|
| 1339 |
+
if providers_with_defaults:
|
| 1340 |
+
self.console.print()
|
| 1341 |
+
self.console.print("[dim]Providers using default modes:[/dim]")
|
| 1342 |
+
for provider in providers_with_defaults:
|
| 1343 |
+
default_mode = self.rotation_mgr.get_default_mode(provider)
|
| 1344 |
+
mode_display = (
|
| 1345 |
+
f"[green]{default_mode}[/green]"
|
| 1346 |
+
if default_mode == "sequential"
|
| 1347 |
+
else f"[blue]{default_mode}[/blue]"
|
| 1348 |
+
)
|
| 1349 |
+
self.console.print(
|
| 1350 |
+
f" • {provider:20} {mode_display} [dim](default)[/dim]"
|
| 1351 |
+
)
|
| 1352 |
+
|
| 1353 |
+
self.console.print()
|
| 1354 |
+
self.console.print("━" * 70)
|
| 1355 |
+
self.console.print(
|
| 1356 |
+
"[dim]* = custom setting (differs from provider default)[/dim]"
|
| 1357 |
+
)
|
| 1358 |
+
self.console.print()
|
| 1359 |
+
self.console.print("[bold]⚙️ Actions[/bold]")
|
| 1360 |
+
self.console.print()
|
| 1361 |
+
self.console.print(" 1. ➕ Set Rotation Mode for Provider")
|
| 1362 |
+
self.console.print(" 2. 🗑️ Reset to Provider Default")
|
| 1363 |
+
self.console.print(" 3. ⚡ Configure Priority Concurrency Multipliers")
|
| 1364 |
+
self.console.print(" 4. ↩️ Back to Settings Menu")
|
| 1365 |
+
|
| 1366 |
+
self.console.print()
|
| 1367 |
+
self.console.print("━" * 70)
|
| 1368 |
+
self.console.print()
|
| 1369 |
+
|
| 1370 |
+
choice = Prompt.ask(
|
| 1371 |
+
"Select option", choices=["1", "2", "3", "4"], show_choices=False
|
| 1372 |
+
)
|
| 1373 |
+
|
| 1374 |
+
if choice == "1":
|
| 1375 |
+
if not available_providers:
|
| 1376 |
+
self.console.print(
|
| 1377 |
+
"\n[yellow]No providers with credentials found. Please add credentials first.[/yellow]"
|
| 1378 |
+
)
|
| 1379 |
+
input("\nPress Enter to continue...")
|
| 1380 |
+
continue
|
| 1381 |
+
|
| 1382 |
+
# Show provider selection menu
|
| 1383 |
+
self.console.print("\n[bold]Select provider:[/bold]")
|
| 1384 |
+
for idx, prov in enumerate(available_providers, 1):
|
| 1385 |
+
current_mode = self.rotation_mgr.get_effective_mode(prov)
|
| 1386 |
+
mode_display = (
|
| 1387 |
+
f"[green]{current_mode}[/green]"
|
| 1388 |
+
if current_mode == "sequential"
|
| 1389 |
+
else f"[blue]{current_mode}[/blue]"
|
| 1390 |
+
)
|
| 1391 |
+
self.console.print(f" {idx}. {prov} ({mode_display})")
|
| 1392 |
+
self.console.print(
|
| 1393 |
+
f" {len(available_providers) + 1}. Enter custom provider name"
|
| 1394 |
+
)
|
| 1395 |
+
|
| 1396 |
+
choice_idx = IntPrompt.ask(
|
| 1397 |
+
"Select option",
|
| 1398 |
+
choices=[str(i) for i in range(1, len(available_providers) + 2)],
|
| 1399 |
+
)
|
| 1400 |
+
|
| 1401 |
+
if choice_idx == len(available_providers) + 1:
|
| 1402 |
+
provider = Prompt.ask("Provider name").strip().lower()
|
| 1403 |
+
else:
|
| 1404 |
+
provider = available_providers[choice_idx - 1]
|
| 1405 |
+
|
| 1406 |
+
if provider:
|
| 1407 |
+
current_mode = self.rotation_mgr.get_effective_mode(provider)
|
| 1408 |
+
self.console.print(
|
| 1409 |
+
f"\nCurrent mode for {provider}: [cyan]{current_mode}[/cyan]"
|
| 1410 |
+
)
|
| 1411 |
+
self.console.print("\nSelect new rotation mode:")
|
| 1412 |
+
self.console.print(
|
| 1413 |
+
" 1. [blue]balanced[/blue] - Rotate credentials evenly"
|
| 1414 |
+
)
|
| 1415 |
+
self.console.print(
|
| 1416 |
+
" 2. [green]sequential[/green] - Use until exhausted"
|
| 1417 |
+
)
|
| 1418 |
+
|
| 1419 |
+
mode_choice = Prompt.ask(
|
| 1420 |
+
"Select mode", choices=["1", "2"], show_choices=False
|
| 1421 |
+
)
|
| 1422 |
+
new_mode = "balanced" if mode_choice == "1" else "sequential"
|
| 1423 |
+
|
| 1424 |
+
self.rotation_mgr.set_mode(provider, new_mode)
|
| 1425 |
+
self.console.print(
|
| 1426 |
+
f"\n[green]✅ Rotation mode for '{provider}' set to {new_mode}![/green]"
|
| 1427 |
+
)
|
| 1428 |
+
input("\nPress Enter to continue...")
|
| 1429 |
+
|
| 1430 |
+
elif choice == "2":
|
| 1431 |
+
if not modes:
|
| 1432 |
+
self.console.print(
|
| 1433 |
+
"\n[yellow]No custom rotation modes to reset[/yellow]"
|
| 1434 |
+
)
|
| 1435 |
+
input("\nPress Enter to continue...")
|
| 1436 |
+
continue
|
| 1437 |
+
|
| 1438 |
+
# Show numbered list
|
| 1439 |
+
self.console.print(
|
| 1440 |
+
"\n[bold]Select provider to reset to default:[/bold]"
|
| 1441 |
+
)
|
| 1442 |
+
modes_list = list(modes.keys())
|
| 1443 |
+
for idx, prov in enumerate(modes_list, 1):
|
| 1444 |
+
default_mode = self.rotation_mgr.get_default_mode(prov)
|
| 1445 |
+
self.console.print(
|
| 1446 |
+
f" {idx}. {prov} (will reset to: {default_mode})"
|
| 1447 |
+
)
|
| 1448 |
+
|
| 1449 |
+
choice_idx = IntPrompt.ask(
|
| 1450 |
+
"Select option",
|
| 1451 |
+
choices=[str(i) for i in range(1, len(modes_list) + 1)],
|
| 1452 |
+
)
|
| 1453 |
+
provider = modes_list[choice_idx - 1]
|
| 1454 |
+
default_mode = self.rotation_mgr.get_default_mode(provider)
|
| 1455 |
+
|
| 1456 |
+
if Confirm.ask(f"Reset '{provider}' to default mode ({default_mode})?"):
|
| 1457 |
+
self.rotation_mgr.remove_mode(provider)
|
| 1458 |
+
self.console.print(
|
| 1459 |
+
f"\n[green]✅ Rotation mode for '{provider}' reset to default ({default_mode})![/green]"
|
| 1460 |
+
)
|
| 1461 |
+
input("\nPress Enter to continue...")
|
| 1462 |
+
|
| 1463 |
+
elif choice == "3":
|
| 1464 |
+
self.manage_priority_multipliers()
|
| 1465 |
+
|
| 1466 |
+
elif choice == "4":
|
| 1467 |
+
break
|
| 1468 |
+
|
| 1469 |
+
def manage_priority_multipliers(self):
|
| 1470 |
+
"""Manage priority-based concurrency multipliers per provider"""
|
| 1471 |
+
clear_screen()
|
| 1472 |
+
|
| 1473 |
+
current_multipliers = self.priority_multiplier_mgr.get_current_multipliers()
|
| 1474 |
+
available_providers = self.get_available_providers()
|
| 1475 |
+
|
| 1476 |
+
self.console.print(
|
| 1477 |
+
Panel.fit(
|
| 1478 |
+
"[bold cyan]⚡ Priority Concurrency Multipliers[/bold cyan]",
|
| 1479 |
+
border_style="cyan",
|
| 1480 |
+
)
|
| 1481 |
+
)
|
| 1482 |
+
|
| 1483 |
+
self.console.print()
|
| 1484 |
+
self.console.print("[bold]📋 Current Priority Multiplier Settings[/bold]")
|
| 1485 |
+
self.console.print("━" * 70)
|
| 1486 |
+
|
| 1487 |
+
# Show all providers with their priority multipliers
|
| 1488 |
+
has_settings = False
|
| 1489 |
+
for provider in available_providers:
|
| 1490 |
+
defaults = self.priority_multiplier_mgr.get_provider_defaults(provider)
|
| 1491 |
+
overrides = current_multipliers.get(provider, {})
|
| 1492 |
+
seq_fallback = self.priority_multiplier_mgr.get_sequential_fallback(
|
| 1493 |
+
provider
|
| 1494 |
+
)
|
| 1495 |
+
rotation_mode = self.rotation_mgr.get_effective_mode(provider)
|
| 1496 |
+
|
| 1497 |
+
if defaults or overrides or seq_fallback != 1:
|
| 1498 |
+
has_settings = True
|
| 1499 |
+
self.console.print(
|
| 1500 |
+
f"\n [bold]{provider}[/bold] ({rotation_mode} mode)"
|
| 1501 |
+
)
|
| 1502 |
+
|
| 1503 |
+
# Combine and display priorities
|
| 1504 |
+
all_priorities = set(defaults.keys()) | set(overrides.keys())
|
| 1505 |
+
for priority in sorted(all_priorities):
|
| 1506 |
+
default_val = defaults.get(priority, 1)
|
| 1507 |
+
override_val = overrides.get(priority)
|
| 1508 |
+
|
| 1509 |
+
if override_val is not None:
|
| 1510 |
+
self.console.print(
|
| 1511 |
+
f" Priority {priority}: [cyan]{override_val}x[/cyan] (override, default: {default_val}x)"
|
| 1512 |
+
)
|
| 1513 |
+
else:
|
| 1514 |
+
self.console.print(
|
| 1515 |
+
f" Priority {priority}: {default_val}x [dim](default)[/dim]"
|
| 1516 |
+
)
|
| 1517 |
+
|
| 1518 |
+
# Show sequential fallback if applicable
|
| 1519 |
+
if rotation_mode == "sequential" and seq_fallback != 1:
|
| 1520 |
+
self.console.print(
|
| 1521 |
+
f" Others (seq): {seq_fallback}x [dim](fallback)[/dim]"
|
| 1522 |
+
)
|
| 1523 |
+
|
| 1524 |
+
if not has_settings:
|
| 1525 |
+
self.console.print(" [dim]No priority multipliers configured[/dim]")
|
| 1526 |
+
|
| 1527 |
+
self.console.print()
|
| 1528 |
+
self.console.print("[bold]ℹ️ About Priority Multipliers:[/bold]")
|
| 1529 |
+
self.console.print(
|
| 1530 |
+
" Higher priority tiers (lower numbers) can have higher multipliers."
|
| 1531 |
+
)
|
| 1532 |
+
self.console.print(" Example: Priority 1 = 5x, Priority 2 = 3x, Others = 1x")
|
| 1533 |
+
self.console.print()
|
| 1534 |
+
self.console.print("━" * 70)
|
| 1535 |
+
self.console.print()
|
| 1536 |
+
self.console.print(" 1. ✏️ Set Priority Multiplier")
|
| 1537 |
+
self.console.print(" 2. 🔄 Reset to Provider Default")
|
| 1538 |
+
self.console.print(" 3. ↩️ Back")
|
| 1539 |
+
|
| 1540 |
+
choice = Prompt.ask(
|
| 1541 |
+
"Select option", choices=["1", "2", "3"], show_choices=False
|
| 1542 |
+
)
|
| 1543 |
+
|
| 1544 |
+
if choice == "1":
|
| 1545 |
+
if not available_providers:
|
| 1546 |
+
self.console.print("\n[yellow]No providers available[/yellow]")
|
| 1547 |
+
input("\nPress Enter to continue...")
|
| 1548 |
+
return
|
| 1549 |
+
|
| 1550 |
+
# Select provider
|
| 1551 |
+
self.console.print("\n[bold]Select provider:[/bold]")
|
| 1552 |
+
for idx, prov in enumerate(available_providers, 1):
|
| 1553 |
+
self.console.print(f" {idx}. {prov}")
|
| 1554 |
+
|
| 1555 |
+
prov_idx = IntPrompt.ask(
|
| 1556 |
+
"Provider",
|
| 1557 |
+
choices=[str(i) for i in range(1, len(available_providers) + 1)],
|
| 1558 |
+
)
|
| 1559 |
+
provider = available_providers[prov_idx - 1]
|
| 1560 |
+
|
| 1561 |
+
# Get priority level
|
| 1562 |
+
priority = IntPrompt.ask("Priority level (e.g., 1, 2, 3)")
|
| 1563 |
+
|
| 1564 |
+
# Get current value
|
| 1565 |
+
current = self.priority_multiplier_mgr.get_effective_multiplier(
|
| 1566 |
+
provider, priority
|
| 1567 |
+
)
|
| 1568 |
+
self.console.print(
|
| 1569 |
+
f"\nCurrent multiplier for priority {priority}: {current}x"
|
| 1570 |
+
)
|
| 1571 |
+
|
| 1572 |
+
multiplier = IntPrompt.ask("New multiplier (1-10)", default=current)
|
| 1573 |
+
if 1 <= multiplier <= 10:
|
| 1574 |
+
self.priority_multiplier_mgr.set_multiplier(
|
| 1575 |
+
provider, priority, multiplier
|
| 1576 |
+
)
|
| 1577 |
+
self.console.print(
|
| 1578 |
+
f"\n[green]✅ Priority {priority} multiplier for '{provider}' set to {multiplier}x[/green]"
|
| 1579 |
+
)
|
| 1580 |
+
else:
|
| 1581 |
+
self.console.print(
|
| 1582 |
+
"\n[yellow]Multiplier must be between 1 and 10[/yellow]"
|
| 1583 |
+
)
|
| 1584 |
+
input("\nPress Enter to continue...")
|
| 1585 |
+
|
| 1586 |
+
elif choice == "2":
|
| 1587 |
+
# Find providers with overrides
|
| 1588 |
+
providers_with_overrides = [
|
| 1589 |
+
p for p in available_providers if p in current_multipliers
|
| 1590 |
+
]
|
| 1591 |
+
if not providers_with_overrides:
|
| 1592 |
+
self.console.print("\n[yellow]No custom multipliers to reset[/yellow]")
|
| 1593 |
+
input("\nPress Enter to continue...")
|
| 1594 |
+
return
|
| 1595 |
+
|
| 1596 |
+
self.console.print("\n[bold]Select provider to reset:[/bold]")
|
| 1597 |
+
for idx, prov in enumerate(providers_with_overrides, 1):
|
| 1598 |
+
self.console.print(f" {idx}. {prov}")
|
| 1599 |
+
|
| 1600 |
+
prov_idx = IntPrompt.ask(
|
| 1601 |
+
"Provider",
|
| 1602 |
+
choices=[str(i) for i in range(1, len(providers_with_overrides) + 1)],
|
| 1603 |
+
)
|
| 1604 |
+
provider = providers_with_overrides[prov_idx - 1]
|
| 1605 |
+
|
| 1606 |
+
# Get priority to reset
|
| 1607 |
+
overrides = current_multipliers.get(provider, {})
|
| 1608 |
+
if len(overrides) == 1:
|
| 1609 |
+
priority = list(overrides.keys())[0]
|
| 1610 |
+
else:
|
| 1611 |
+
self.console.print(f"\nOverrides for {provider}: {overrides}")
|
| 1612 |
+
priority = IntPrompt.ask("Priority level to reset")
|
| 1613 |
+
|
| 1614 |
+
if priority in overrides:
|
| 1615 |
+
self.priority_multiplier_mgr.remove_multiplier(provider, priority)
|
| 1616 |
+
default = self.priority_multiplier_mgr.get_effective_multiplier(
|
| 1617 |
+
provider, priority
|
| 1618 |
+
)
|
| 1619 |
+
self.console.print(
|
| 1620 |
+
f"\n[green]✅ Reset priority {priority} for '{provider}' to default ({default}x)[/green]"
|
| 1621 |
+
)
|
| 1622 |
+
else:
|
| 1623 |
+
self.console.print(
|
| 1624 |
+
f"\n[yellow]No override for priority {priority}[/yellow]"
|
| 1625 |
+
)
|
| 1626 |
+
input("\nPress Enter to continue...")
|
| 1627 |
+
|
| 1628 |
def manage_concurrency_limits(self):
|
| 1629 |
"""Manage concurrency limits"""
|
| 1630 |
while True:
|
| 1631 |
clear_screen()
|
| 1632 |
+
|
| 1633 |
limits = self.concurrency_mgr.get_current_limits()
|
| 1634 |
+
|
| 1635 |
+
self.console.print(
|
| 1636 |
+
Panel.fit(
|
| 1637 |
+
"[bold cyan]⚡ Concurrency Limits Configuration[/bold cyan]",
|
| 1638 |
+
border_style="cyan",
|
| 1639 |
+
)
|
| 1640 |
+
)
|
| 1641 |
+
|
| 1642 |
self.console.print()
|
| 1643 |
self.console.print("[bold]📋 Current Concurrency Settings[/bold]")
|
| 1644 |
self.console.print("━" * 70)
|
| 1645 |
+
|
| 1646 |
if limits:
|
| 1647 |
for provider, limit in limits.items():
|
| 1648 |
self.console.print(f" • {provider:15} {limit} requests/key")
|
| 1649 |
self.console.print(f" • Default: 1 request/key (all others)")
|
| 1650 |
else:
|
| 1651 |
self.console.print(" • Default: 1 request/key (all providers)")
|
| 1652 |
+
|
| 1653 |
self.console.print()
|
| 1654 |
self.console.print("━" * 70)
|
| 1655 |
self.console.print()
|
|
|
|
| 1659 |
self.console.print(" 2. ✏️ Edit Existing Limit")
|
| 1660 |
self.console.print(" 3. 🗑️ Remove Limit (reset to default)")
|
| 1661 |
self.console.print(" 4. ↩️ Back to Settings Menu")
|
| 1662 |
+
|
| 1663 |
self.console.print()
|
| 1664 |
self.console.print("━" * 70)
|
| 1665 |
self.console.print()
|
| 1666 |
+
|
| 1667 |
+
choice = Prompt.ask(
|
| 1668 |
+
"Select option", choices=["1", "2", "3", "4"], show_choices=False
|
| 1669 |
+
)
|
| 1670 |
+
|
| 1671 |
if choice == "1":
|
| 1672 |
# Get available providers
|
| 1673 |
available_providers = self.get_available_providers()
|
| 1674 |
+
|
| 1675 |
if not available_providers:
|
| 1676 |
+
self.console.print(
|
| 1677 |
+
"\n[yellow]No providers with credentials found. Please add credentials first.[/yellow]"
|
| 1678 |
+
)
|
| 1679 |
input("\nPress Enter to continue...")
|
| 1680 |
continue
|
| 1681 |
+
|
| 1682 |
# Show provider selection menu
|
| 1683 |
self.console.print("\n[bold]Select provider:[/bold]")
|
| 1684 |
for idx, prov in enumerate(available_providers, 1):
|
| 1685 |
self.console.print(f" {idx}. {prov}")
|
| 1686 |
+
self.console.print(
|
| 1687 |
+
f" {len(available_providers) + 1}. Enter custom provider name"
|
| 1688 |
+
)
|
| 1689 |
+
|
| 1690 |
+
choice_idx = IntPrompt.ask(
|
| 1691 |
+
"Select option",
|
| 1692 |
+
choices=[str(i) for i in range(1, len(available_providers) + 2)],
|
| 1693 |
+
)
|
| 1694 |
+
|
| 1695 |
if choice_idx == len(available_providers) + 1:
|
| 1696 |
provider = Prompt.ask("Provider name").strip().lower()
|
| 1697 |
else:
|
| 1698 |
provider = available_providers[choice_idx - 1]
|
| 1699 |
+
|
| 1700 |
if provider:
|
| 1701 |
+
limit = IntPrompt.ask(
|
| 1702 |
+
"Max concurrent requests per key (1-100)", default=1
|
| 1703 |
+
)
|
| 1704 |
if 1 <= limit <= 100:
|
| 1705 |
self.concurrency_mgr.set_limit(provider, limit)
|
| 1706 |
+
self.console.print(
|
| 1707 |
+
f"\n[green]✅ Concurrency limit set for '{provider}': {limit} requests/key[/green]"
|
| 1708 |
+
)
|
| 1709 |
else:
|
| 1710 |
+
self.console.print(
|
| 1711 |
+
"\n[red]❌ Limit must be between 1-100[/red]"
|
| 1712 |
+
)
|
| 1713 |
input("\nPress Enter to continue...")
|
| 1714 |
+
|
| 1715 |
elif choice == "2":
|
| 1716 |
if not limits:
|
| 1717 |
self.console.print("\n[yellow]No limits to edit[/yellow]")
|
| 1718 |
input("\nPress Enter to continue...")
|
| 1719 |
continue
|
| 1720 |
+
|
| 1721 |
# Show numbered list
|
| 1722 |
self.console.print("\n[bold]Select provider to edit:[/bold]")
|
| 1723 |
limits_list = list(limits.keys())
|
| 1724 |
for idx, prov in enumerate(limits_list, 1):
|
| 1725 |
self.console.print(f" {idx}. {prov}")
|
| 1726 |
+
|
| 1727 |
+
choice_idx = IntPrompt.ask(
|
| 1728 |
+
"Select option",
|
| 1729 |
+
choices=[str(i) for i in range(1, len(limits_list) + 1)],
|
| 1730 |
+
)
|
| 1731 |
provider = limits_list[choice_idx - 1]
|
| 1732 |
current_limit = limits.get(provider, 1)
|
| 1733 |
+
|
| 1734 |
self.console.print(f"\nCurrent limit: {current_limit} requests/key")
|
| 1735 |
+
new_limit = IntPrompt.ask(
|
| 1736 |
+
"New limit (1-100) [press Enter to keep current]",
|
| 1737 |
+
default=current_limit,
|
| 1738 |
+
)
|
| 1739 |
+
|
| 1740 |
if 1 <= new_limit <= 100:
|
| 1741 |
if new_limit != current_limit:
|
| 1742 |
self.concurrency_mgr.set_limit(provider, new_limit)
|
| 1743 |
+
self.console.print(
|
| 1744 |
+
f"\n[green]✅ Concurrency limit updated for '{provider}': {new_limit} requests/key[/green]"
|
| 1745 |
+
)
|
| 1746 |
else:
|
| 1747 |
self.console.print("\n[yellow]No changes made[/yellow]")
|
| 1748 |
else:
|
| 1749 |
self.console.print("\n[red]Limit must be between 1-100[/red]")
|
| 1750 |
input("\nPress Enter to continue...")
|
| 1751 |
+
|
| 1752 |
elif choice == "3":
|
| 1753 |
if not limits:
|
| 1754 |
self.console.print("\n[yellow]No limits to remove[/yellow]")
|
| 1755 |
input("\nPress Enter to continue...")
|
| 1756 |
continue
|
| 1757 |
+
|
| 1758 |
# Show numbered list
|
| 1759 |
+
self.console.print(
|
| 1760 |
+
"\n[bold]Select provider to remove limit from:[/bold]"
|
| 1761 |
+
)
|
| 1762 |
limits_list = list(limits.keys())
|
| 1763 |
for idx, prov in enumerate(limits_list, 1):
|
| 1764 |
self.console.print(f" {idx}. {prov}")
|
| 1765 |
+
|
| 1766 |
+
choice_idx = IntPrompt.ask(
|
| 1767 |
+
"Select option",
|
| 1768 |
+
choices=[str(i) for i in range(1, len(limits_list) + 1)],
|
| 1769 |
+
)
|
| 1770 |
provider = limits_list[choice_idx - 1]
|
| 1771 |
+
|
| 1772 |
+
if Confirm.ask(
|
| 1773 |
+
f"Remove concurrency limit for '{provider}' (reset to default 1)?"
|
| 1774 |
+
):
|
| 1775 |
self.concurrency_mgr.remove_limit(provider)
|
| 1776 |
+
self.console.print(
|
| 1777 |
+
f"\n[green]✅ Limit removed for '{provider}' - using default (1 request/key)[/green]"
|
| 1778 |
+
)
|
| 1779 |
input("\nPress Enter to continue...")
|
| 1780 |
+
|
| 1781 |
elif choice == "4":
|
| 1782 |
break
|
| 1783 |
+
|
| 1784 |
def save_and_exit(self):
|
| 1785 |
"""Save pending changes and exit"""
|
| 1786 |
if self.settings.has_pending():
|
|
|
|
| 1795 |
else:
|
| 1796 |
self.console.print("\n[dim]No changes to save[/dim]")
|
| 1797 |
input("\nPress Enter to return to launcher...")
|
| 1798 |
+
|
| 1799 |
self.running = False
|
| 1800 |
+
|
| 1801 |
def exit_without_saving(self):
|
| 1802 |
"""Exit without saving"""
|
| 1803 |
if self.settings.has_pending():
|
src/rotator_library/client.py
CHANGED
|
@@ -139,12 +139,119 @@ class RotatingClient:
|
|
| 139 |
self.max_retries = max_retries
|
| 140 |
self.global_timeout = global_timeout
|
| 141 |
self.abort_on_callback_error = abort_on_callback_error
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
self.usage_manager = UsageManager(
|
| 143 |
-
file_path=usage_file_path,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
)
|
| 145 |
self._model_list_cache = {}
|
| 146 |
-
self._provider_plugins = PROVIDER_PLUGINS
|
| 147 |
-
self._provider_instances = {}
|
| 148 |
self.http_client = httpx.AsyncClient()
|
| 149 |
self.all_providers = AllProviders()
|
| 150 |
self.cooldown_manager = CooldownManager()
|
|
@@ -958,19 +1065,185 @@ class RotatingClient:
|
|
| 958 |
is_budget_enabled
|
| 959 |
)
|
| 960 |
|
| 961 |
-
#
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
|
|
|
| 966 |
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
|
| 970 |
-
|
| 971 |
-
|
| 972 |
-
|
| 973 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 974 |
|
| 975 |
else: # This is the standard API Key / litellm-handled provider logic
|
| 976 |
is_oauth = provider in self.oauth_providers
|
|
@@ -1070,7 +1343,7 @@ class RotatingClient:
|
|
| 1070 |
if request
|
| 1071 |
else {},
|
| 1072 |
)
|
| 1073 |
-
classified_error = classify_error(e)
|
| 1074 |
|
| 1075 |
# Extract a clean error message for the user-facing log
|
| 1076 |
error_message = str(e).split("\n")[0]
|
|
@@ -1114,7 +1387,7 @@ class RotatingClient:
|
|
| 1114 |
if request
|
| 1115 |
else {},
|
| 1116 |
)
|
| 1117 |
-
classified_error = classify_error(e)
|
| 1118 |
error_message = str(e).split("\n")[0]
|
| 1119 |
|
| 1120 |
# Provider-level error: don't increment consecutive failures
|
|
@@ -1170,7 +1443,7 @@ class RotatingClient:
|
|
| 1170 |
else {},
|
| 1171 |
)
|
| 1172 |
|
| 1173 |
-
classified_error = classify_error(e)
|
| 1174 |
error_message = str(e).split("\n")[0]
|
| 1175 |
|
| 1176 |
lib_logger.warning(
|
|
@@ -1239,7 +1512,7 @@ class RotatingClient:
|
|
| 1239 |
)
|
| 1240 |
raise last_exception
|
| 1241 |
|
| 1242 |
-
classified_error = classify_error(e)
|
| 1243 |
error_message = str(e).split("\n")[0]
|
| 1244 |
|
| 1245 |
lib_logger.warning(
|
|
@@ -1566,7 +1839,9 @@ class RotatingClient:
|
|
| 1566 |
last_exception = e
|
| 1567 |
# If the exception is our custom wrapper, unwrap the original error
|
| 1568 |
original_exc = getattr(e, "data", e)
|
| 1569 |
-
classified_error = classify_error(
|
|
|
|
|
|
|
| 1570 |
error_message = str(original_exc).split("\n")[0]
|
| 1571 |
|
| 1572 |
log_failure(
|
|
@@ -1623,7 +1898,7 @@ class RotatingClient:
|
|
| 1623 |
if request
|
| 1624 |
else {},
|
| 1625 |
)
|
| 1626 |
-
classified_error = classify_error(e)
|
| 1627 |
error_message = str(e).split("\n")[0]
|
| 1628 |
|
| 1629 |
# Provider-level error: don't increment consecutive failures
|
|
@@ -1673,7 +1948,7 @@ class RotatingClient:
|
|
| 1673 |
if request
|
| 1674 |
else {},
|
| 1675 |
)
|
| 1676 |
-
classified_error = classify_error(e)
|
| 1677 |
error_message = str(e).split("\n")[0]
|
| 1678 |
|
| 1679 |
# Record in accumulator
|
|
@@ -1812,7 +2087,9 @@ class RotatingClient:
|
|
| 1812 |
cleaned_str = None
|
| 1813 |
# The actual exception might be wrapped in our StreamedAPIError.
|
| 1814 |
original_exc = getattr(e, "data", e)
|
| 1815 |
-
classified_error = classify_error(
|
|
|
|
|
|
|
| 1816 |
|
| 1817 |
# Check if this error should trigger rotation
|
| 1818 |
if not should_rotate_on_error(classified_error):
|
|
@@ -1939,7 +2216,7 @@ class RotatingClient:
|
|
| 1939 |
if request
|
| 1940 |
else {},
|
| 1941 |
)
|
| 1942 |
-
classified_error = classify_error(e)
|
| 1943 |
error_message_text = str(e).split("\n")[0]
|
| 1944 |
|
| 1945 |
# Record error in accumulator (server errors are transient, not abnormal)
|
|
@@ -1990,7 +2267,7 @@ class RotatingClient:
|
|
| 1990 |
if request
|
| 1991 |
else {},
|
| 1992 |
)
|
| 1993 |
-
classified_error = classify_error(e)
|
| 1994 |
error_message_text = str(e).split("\n")[0]
|
| 1995 |
|
| 1996 |
# Record error in accumulator
|
|
@@ -2232,7 +2509,7 @@ class RotatingClient:
|
|
| 2232 |
self._model_list_cache[provider] = final_models
|
| 2233 |
return final_models
|
| 2234 |
except Exception as e:
|
| 2235 |
-
classified_error = classify_error(e)
|
| 2236 |
cred_display = mask_credential(credential)
|
| 2237 |
lib_logger.debug(
|
| 2238 |
f"Failed to get models for provider {provider} with credential {cred_display}: {classified_error.error_type}. Trying next credential."
|
|
|
|
| 139 |
self.max_retries = max_retries
|
| 140 |
self.global_timeout = global_timeout
|
| 141 |
self.abort_on_callback_error = abort_on_callback_error
|
| 142 |
+
|
| 143 |
+
# Initialize provider plugins early so they can be used for rotation mode detection
|
| 144 |
+
self._provider_plugins = PROVIDER_PLUGINS
|
| 145 |
+
self._provider_instances = {}
|
| 146 |
+
|
| 147 |
+
# Build provider rotation modes map
|
| 148 |
+
# Each provider can specify its preferred rotation mode ("balanced" or "sequential")
|
| 149 |
+
provider_rotation_modes = {}
|
| 150 |
+
for provider in self.all_credentials.keys():
|
| 151 |
+
provider_class = self._provider_plugins.get(provider)
|
| 152 |
+
if provider_class and hasattr(provider_class, "get_rotation_mode"):
|
| 153 |
+
# Use class method to get rotation mode (checks env var + class default)
|
| 154 |
+
mode = provider_class.get_rotation_mode(provider)
|
| 155 |
+
else:
|
| 156 |
+
# Fallback: check environment variable directly
|
| 157 |
+
env_key = f"ROTATION_MODE_{provider.upper()}"
|
| 158 |
+
mode = os.getenv(env_key, "balanced")
|
| 159 |
+
|
| 160 |
+
provider_rotation_modes[provider] = mode
|
| 161 |
+
if mode != "balanced":
|
| 162 |
+
lib_logger.info(f"Provider '{provider}' using rotation mode: {mode}")
|
| 163 |
+
|
| 164 |
+
# Build priority-based concurrency multiplier maps
|
| 165 |
+
# These are universal multipliers based on credential tier/priority
|
| 166 |
+
priority_multipliers: Dict[str, Dict[int, int]] = {}
|
| 167 |
+
priority_multipliers_by_mode: Dict[str, Dict[str, Dict[int, int]]] = {}
|
| 168 |
+
sequential_fallback_multipliers: Dict[str, int] = {}
|
| 169 |
+
|
| 170 |
+
for provider in self.all_credentials.keys():
|
| 171 |
+
provider_class = self._provider_plugins.get(provider)
|
| 172 |
+
|
| 173 |
+
# Start with provider class defaults
|
| 174 |
+
if provider_class:
|
| 175 |
+
# Get default priority multipliers from provider class
|
| 176 |
+
if hasattr(provider_class, "default_priority_multipliers"):
|
| 177 |
+
default_multipliers = provider_class.default_priority_multipliers
|
| 178 |
+
if default_multipliers:
|
| 179 |
+
priority_multipliers[provider] = dict(default_multipliers)
|
| 180 |
+
|
| 181 |
+
# Get sequential fallback from provider class
|
| 182 |
+
if hasattr(provider_class, "default_sequential_fallback_multiplier"):
|
| 183 |
+
fallback = provider_class.default_sequential_fallback_multiplier
|
| 184 |
+
if fallback != 1: # Only store if different from global default
|
| 185 |
+
sequential_fallback_multipliers[provider] = fallback
|
| 186 |
+
|
| 187 |
+
# Override with environment variables
|
| 188 |
+
# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
|
| 189 |
+
# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
|
| 190 |
+
for key, value in os.environ.items():
|
| 191 |
+
prefix = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_"
|
| 192 |
+
if key.startswith(prefix):
|
| 193 |
+
remainder = key[len(prefix) :]
|
| 194 |
+
try:
|
| 195 |
+
multiplier = int(value)
|
| 196 |
+
if multiplier < 1:
|
| 197 |
+
lib_logger.warning(f"Invalid {key}: {value}. Must be >= 1.")
|
| 198 |
+
continue
|
| 199 |
+
|
| 200 |
+
# Check if mode-specific (e.g., _PRIORITY_1_SEQUENTIAL)
|
| 201 |
+
if "_" in remainder:
|
| 202 |
+
parts = remainder.rsplit("_", 1)
|
| 203 |
+
priority = int(parts[0])
|
| 204 |
+
mode = parts[1].lower()
|
| 205 |
+
if mode in ("sequential", "balanced"):
|
| 206 |
+
# Mode-specific override
|
| 207 |
+
if provider not in priority_multipliers_by_mode:
|
| 208 |
+
priority_multipliers_by_mode[provider] = {}
|
| 209 |
+
if mode not in priority_multipliers_by_mode[provider]:
|
| 210 |
+
priority_multipliers_by_mode[provider][mode] = {}
|
| 211 |
+
priority_multipliers_by_mode[provider][mode][
|
| 212 |
+
priority
|
| 213 |
+
] = multiplier
|
| 214 |
+
lib_logger.info(
|
| 215 |
+
f"Provider '{provider}' priority {priority} ({mode} mode) multiplier: {multiplier}x"
|
| 216 |
+
)
|
| 217 |
+
else:
|
| 218 |
+
# Assume it's part of the priority number (unlikely but handle gracefully)
|
| 219 |
+
lib_logger.warning(f"Unknown mode in {key}: {mode}")
|
| 220 |
+
else:
|
| 221 |
+
# Universal priority multiplier
|
| 222 |
+
priority = int(remainder)
|
| 223 |
+
if provider not in priority_multipliers:
|
| 224 |
+
priority_multipliers[provider] = {}
|
| 225 |
+
priority_multipliers[provider][priority] = multiplier
|
| 226 |
+
lib_logger.info(
|
| 227 |
+
f"Provider '{provider}' priority {priority} multiplier: {multiplier}x"
|
| 228 |
+
)
|
| 229 |
+
except ValueError:
|
| 230 |
+
lib_logger.warning(
|
| 231 |
+
f"Invalid {key}: {value}. Could not parse priority or multiplier."
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
# Log configured multipliers
|
| 235 |
+
for provider, multipliers in priority_multipliers.items():
|
| 236 |
+
if multipliers:
|
| 237 |
+
lib_logger.info(
|
| 238 |
+
f"Provider '{provider}' priority multipliers: {multipliers}"
|
| 239 |
+
)
|
| 240 |
+
for provider, fallback in sequential_fallback_multipliers.items():
|
| 241 |
+
lib_logger.info(
|
| 242 |
+
f"Provider '{provider}' sequential fallback multiplier: {fallback}x"
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
self.usage_manager = UsageManager(
|
| 246 |
+
file_path=usage_file_path,
|
| 247 |
+
rotation_tolerance=rotation_tolerance,
|
| 248 |
+
provider_rotation_modes=provider_rotation_modes,
|
| 249 |
+
provider_plugins=PROVIDER_PLUGINS,
|
| 250 |
+
priority_multipliers=priority_multipliers,
|
| 251 |
+
priority_multipliers_by_mode=priority_multipliers_by_mode,
|
| 252 |
+
sequential_fallback_multipliers=sequential_fallback_multipliers,
|
| 253 |
)
|
| 254 |
self._model_list_cache = {}
|
|
|
|
|
|
|
| 255 |
self.http_client = httpx.AsyncClient()
|
| 256 |
self.all_providers = AllProviders()
|
| 257 |
self.cooldown_manager = CooldownManager()
|
|
|
|
| 1065 |
is_budget_enabled
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
+
# Retry loop for custom providers - mirrors streaming path error handling
|
| 1069 |
+
for attempt in range(self.max_retries):
|
| 1070 |
+
try:
|
| 1071 |
+
lib_logger.info(
|
| 1072 |
+
f"Attempting call with credential {mask_credential(current_cred)} (Attempt {attempt + 1}/{self.max_retries})"
|
| 1073 |
+
)
|
| 1074 |
|
| 1075 |
+
if pre_request_callback:
|
| 1076 |
+
try:
|
| 1077 |
+
await pre_request_callback(request, litellm_kwargs)
|
| 1078 |
+
except Exception as e:
|
| 1079 |
+
if self.abort_on_callback_error:
|
| 1080 |
+
raise PreRequestCallbackError(
|
| 1081 |
+
f"Pre-request callback failed: {e}"
|
| 1082 |
+
) from e
|
| 1083 |
+
else:
|
| 1084 |
+
lib_logger.warning(
|
| 1085 |
+
f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}"
|
| 1086 |
+
)
|
| 1087 |
+
|
| 1088 |
+
response = await provider_plugin.acompletion(
|
| 1089 |
+
self.http_client, **litellm_kwargs
|
| 1090 |
+
)
|
| 1091 |
+
|
| 1092 |
+
# For non-streaming, success is immediate
|
| 1093 |
+
await self.usage_manager.record_success(
|
| 1094 |
+
current_cred, model, response
|
| 1095 |
+
)
|
| 1096 |
+
await self.usage_manager.release_key(current_cred, model)
|
| 1097 |
+
key_acquired = False
|
| 1098 |
+
return response
|
| 1099 |
+
|
| 1100 |
+
except (
|
| 1101 |
+
litellm.RateLimitError,
|
| 1102 |
+
httpx.HTTPStatusError,
|
| 1103 |
+
) as e:
|
| 1104 |
+
last_exception = e
|
| 1105 |
+
classified_error = classify_error(e, provider=provider)
|
| 1106 |
+
error_message = str(e).split("\n")[0]
|
| 1107 |
+
|
| 1108 |
+
log_failure(
|
| 1109 |
+
api_key=current_cred,
|
| 1110 |
+
model=model,
|
| 1111 |
+
attempt=attempt + 1,
|
| 1112 |
+
error=e,
|
| 1113 |
+
request_headers=dict(request.headers)
|
| 1114 |
+
if request
|
| 1115 |
+
else {},
|
| 1116 |
+
)
|
| 1117 |
+
|
| 1118 |
+
# Record in accumulator for client reporting
|
| 1119 |
+
error_accumulator.record_error(
|
| 1120 |
+
current_cred, classified_error, error_message
|
| 1121 |
+
)
|
| 1122 |
+
|
| 1123 |
+
# Check if this error should trigger rotation
|
| 1124 |
+
if not should_rotate_on_error(classified_error):
|
| 1125 |
+
lib_logger.error(
|
| 1126 |
+
f"Non-recoverable error ({classified_error.error_type}) during custom provider call. Failing."
|
| 1127 |
+
)
|
| 1128 |
+
raise last_exception
|
| 1129 |
+
|
| 1130 |
+
# Handle rate limits with cooldown (exclude quota_exceeded)
|
| 1131 |
+
if classified_error.error_type == "rate_limit":
|
| 1132 |
+
cooldown_duration = classified_error.retry_after or 60
|
| 1133 |
+
await self.cooldown_manager.start_cooldown(
|
| 1134 |
+
provider, cooldown_duration
|
| 1135 |
+
)
|
| 1136 |
+
|
| 1137 |
+
await self.usage_manager.record_failure(
|
| 1138 |
+
current_cred, model, classified_error
|
| 1139 |
+
)
|
| 1140 |
+
lib_logger.warning(
|
| 1141 |
+
f"Cred {mask_credential(current_cred)} {classified_error.error_type} (HTTP {classified_error.status_code}). Rotating."
|
| 1142 |
+
)
|
| 1143 |
+
break # Rotate to next credential
|
| 1144 |
+
|
| 1145 |
+
except (
|
| 1146 |
+
APIConnectionError,
|
| 1147 |
+
litellm.InternalServerError,
|
| 1148 |
+
litellm.ServiceUnavailableError,
|
| 1149 |
+
) as e:
|
| 1150 |
+
last_exception = e
|
| 1151 |
+
log_failure(
|
| 1152 |
+
api_key=current_cred,
|
| 1153 |
+
model=model,
|
| 1154 |
+
attempt=attempt + 1,
|
| 1155 |
+
error=e,
|
| 1156 |
+
request_headers=dict(request.headers)
|
| 1157 |
+
if request
|
| 1158 |
+
else {},
|
| 1159 |
+
)
|
| 1160 |
+
classified_error = classify_error(e, provider=provider)
|
| 1161 |
+
error_message = str(e).split("\n")[0]
|
| 1162 |
+
|
| 1163 |
+
# Provider-level error: don't increment consecutive failures
|
| 1164 |
+
await self.usage_manager.record_failure(
|
| 1165 |
+
current_cred,
|
| 1166 |
+
model,
|
| 1167 |
+
classified_error,
|
| 1168 |
+
increment_consecutive_failures=False,
|
| 1169 |
+
)
|
| 1170 |
+
|
| 1171 |
+
if attempt >= self.max_retries - 1:
|
| 1172 |
+
error_accumulator.record_error(
|
| 1173 |
+
current_cred, classified_error, error_message
|
| 1174 |
+
)
|
| 1175 |
+
lib_logger.warning(
|
| 1176 |
+
f"Cred {mask_credential(current_cred)} failed after max retries. Rotating."
|
| 1177 |
+
)
|
| 1178 |
+
break
|
| 1179 |
+
|
| 1180 |
+
wait_time = classified_error.retry_after or (
|
| 1181 |
+
2**attempt
|
| 1182 |
+
) + random.uniform(0, 1)
|
| 1183 |
+
remaining_budget = deadline - time.time()
|
| 1184 |
+
if wait_time > remaining_budget:
|
| 1185 |
+
error_accumulator.record_error(
|
| 1186 |
+
current_cred, classified_error, error_message
|
| 1187 |
+
)
|
| 1188 |
+
lib_logger.warning(
|
| 1189 |
+
f"Retry wait ({wait_time:.2f}s) exceeds budget. Rotating."
|
| 1190 |
+
)
|
| 1191 |
+
break
|
| 1192 |
+
|
| 1193 |
+
lib_logger.warning(
|
| 1194 |
+
f"Cred {mask_credential(current_cred)} server error. Retrying in {wait_time:.2f}s."
|
| 1195 |
+
)
|
| 1196 |
+
await asyncio.sleep(wait_time)
|
| 1197 |
+
continue
|
| 1198 |
+
|
| 1199 |
+
except Exception as e:
|
| 1200 |
+
last_exception = e
|
| 1201 |
+
log_failure(
|
| 1202 |
+
api_key=current_cred,
|
| 1203 |
+
model=model,
|
| 1204 |
+
attempt=attempt + 1,
|
| 1205 |
+
error=e,
|
| 1206 |
+
request_headers=dict(request.headers)
|
| 1207 |
+
if request
|
| 1208 |
+
else {},
|
| 1209 |
+
)
|
| 1210 |
+
classified_error = classify_error(e, provider=provider)
|
| 1211 |
+
error_message = str(e).split("\n")[0]
|
| 1212 |
+
|
| 1213 |
+
# Record in accumulator
|
| 1214 |
+
error_accumulator.record_error(
|
| 1215 |
+
current_cred, classified_error, error_message
|
| 1216 |
+
)
|
| 1217 |
+
|
| 1218 |
+
lib_logger.warning(
|
| 1219 |
+
f"Cred {mask_credential(current_cred)} {classified_error.error_type} (HTTP {classified_error.status_code})."
|
| 1220 |
+
)
|
| 1221 |
+
|
| 1222 |
+
# Check if this error should trigger rotation
|
| 1223 |
+
if not should_rotate_on_error(classified_error):
|
| 1224 |
+
lib_logger.error(
|
| 1225 |
+
f"Non-recoverable error ({classified_error.error_type}). Failing."
|
| 1226 |
+
)
|
| 1227 |
+
raise last_exception
|
| 1228 |
+
|
| 1229 |
+
# Handle rate limits with cooldown (exclude quota_exceeded)
|
| 1230 |
+
if (
|
| 1231 |
+
classified_error.status_code == 429
|
| 1232 |
+
and classified_error.error_type != "quota_exceeded"
|
| 1233 |
+
) or classified_error.error_type == "rate_limit":
|
| 1234 |
+
cooldown_duration = classified_error.retry_after or 60
|
| 1235 |
+
await self.cooldown_manager.start_cooldown(
|
| 1236 |
+
provider, cooldown_duration
|
| 1237 |
+
)
|
| 1238 |
+
|
| 1239 |
+
await self.usage_manager.record_failure(
|
| 1240 |
+
current_cred, model, classified_error
|
| 1241 |
+
)
|
| 1242 |
+
break # Rotate to next credential
|
| 1243 |
+
|
| 1244 |
+
# If the inner loop breaks, it means the key failed and we need to rotate.
|
| 1245 |
+
# Continue to the next iteration of the outer while loop to pick a new key.
|
| 1246 |
+
continue
|
| 1247 |
|
| 1248 |
else: # This is the standard API Key / litellm-handled provider logic
|
| 1249 |
is_oauth = provider in self.oauth_providers
|
|
|
|
| 1343 |
if request
|
| 1344 |
else {},
|
| 1345 |
)
|
| 1346 |
+
classified_error = classify_error(e, provider=provider)
|
| 1347 |
|
| 1348 |
# Extract a clean error message for the user-facing log
|
| 1349 |
error_message = str(e).split("\n")[0]
|
|
|
|
| 1387 |
if request
|
| 1388 |
else {},
|
| 1389 |
)
|
| 1390 |
+
classified_error = classify_error(e, provider=provider)
|
| 1391 |
error_message = str(e).split("\n")[0]
|
| 1392 |
|
| 1393 |
# Provider-level error: don't increment consecutive failures
|
|
|
|
| 1443 |
else {},
|
| 1444 |
)
|
| 1445 |
|
| 1446 |
+
classified_error = classify_error(e, provider=provider)
|
| 1447 |
error_message = str(e).split("\n")[0]
|
| 1448 |
|
| 1449 |
lib_logger.warning(
|
|
|
|
| 1512 |
)
|
| 1513 |
raise last_exception
|
| 1514 |
|
| 1515 |
+
classified_error = classify_error(e, provider=provider)
|
| 1516 |
error_message = str(e).split("\n")[0]
|
| 1517 |
|
| 1518 |
lib_logger.warning(
|
|
|
|
| 1839 |
last_exception = e
|
| 1840 |
# If the exception is our custom wrapper, unwrap the original error
|
| 1841 |
original_exc = getattr(e, "data", e)
|
| 1842 |
+
classified_error = classify_error(
|
| 1843 |
+
original_exc, provider=provider
|
| 1844 |
+
)
|
| 1845 |
error_message = str(original_exc).split("\n")[0]
|
| 1846 |
|
| 1847 |
log_failure(
|
|
|
|
| 1898 |
if request
|
| 1899 |
else {},
|
| 1900 |
)
|
| 1901 |
+
classified_error = classify_error(e, provider=provider)
|
| 1902 |
error_message = str(e).split("\n")[0]
|
| 1903 |
|
| 1904 |
# Provider-level error: don't increment consecutive failures
|
|
|
|
| 1948 |
if request
|
| 1949 |
else {},
|
| 1950 |
)
|
| 1951 |
+
classified_error = classify_error(e, provider=provider)
|
| 1952 |
error_message = str(e).split("\n")[0]
|
| 1953 |
|
| 1954 |
# Record in accumulator
|
|
|
|
| 2087 |
cleaned_str = None
|
| 2088 |
# The actual exception might be wrapped in our StreamedAPIError.
|
| 2089 |
original_exc = getattr(e, "data", e)
|
| 2090 |
+
classified_error = classify_error(
|
| 2091 |
+
original_exc, provider=provider
|
| 2092 |
+
)
|
| 2093 |
|
| 2094 |
# Check if this error should trigger rotation
|
| 2095 |
if not should_rotate_on_error(classified_error):
|
|
|
|
| 2216 |
if request
|
| 2217 |
else {},
|
| 2218 |
)
|
| 2219 |
+
classified_error = classify_error(e, provider=provider)
|
| 2220 |
error_message_text = str(e).split("\n")[0]
|
| 2221 |
|
| 2222 |
# Record error in accumulator (server errors are transient, not abnormal)
|
|
|
|
| 2267 |
if request
|
| 2268 |
else {},
|
| 2269 |
)
|
| 2270 |
+
classified_error = classify_error(e, provider=provider)
|
| 2271 |
error_message_text = str(e).split("\n")[0]
|
| 2272 |
|
| 2273 |
# Record error in accumulator
|
|
|
|
| 2509 |
self._model_list_cache[provider] = final_models
|
| 2510 |
return final_models
|
| 2511 |
except Exception as e:
|
| 2512 |
+
classified_error = classify_error(e, provider=provider)
|
| 2513 |
cred_display = mask_credential(credential)
|
| 2514 |
lib_logger.debug(
|
| 2515 |
f"Failed to get models for provider {provider} with credential {cred_display}: {classified_error.error_type}. Trying next credential."
|
src/rotator_library/error_handler.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import re
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
| 4 |
from typing import Optional, Dict, Any
|
| 5 |
import httpx
|
| 6 |
|
|
@@ -17,6 +18,8 @@ from litellm.exceptions import (
|
|
| 17 |
ContextWindowExceededError,
|
| 18 |
)
|
| 19 |
|
|
|
|
|
|
|
| 20 |
|
| 21 |
def _parse_duration_string(duration_str: str) -> Optional[int]:
|
| 22 |
"""
|
|
@@ -344,14 +347,26 @@ class ClassifiedError:
|
|
| 344 |
original_exception: Exception,
|
| 345 |
status_code: Optional[int] = None,
|
| 346 |
retry_after: Optional[int] = None,
|
|
|
|
| 347 |
):
|
| 348 |
self.error_type = error_type
|
| 349 |
self.original_exception = original_exception
|
| 350 |
self.status_code = status_code
|
| 351 |
self.retry_after = retry_after
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
def __str__(self):
|
| 354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
|
| 357 |
def _extract_retry_from_json_body(json_text: str) -> Optional[int]:
|
|
@@ -513,11 +528,15 @@ def get_retry_after(error: Exception) -> Optional[int]:
|
|
| 513 |
return None
|
| 514 |
|
| 515 |
|
| 516 |
-
def classify_error(e: Exception) -> ClassifiedError:
|
| 517 |
"""
|
| 518 |
Classifies an exception into a structured ClassifiedError object.
|
| 519 |
Now handles both litellm and httpx exceptions.
|
| 520 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
Error types and their typical handling:
|
| 522 |
- rate_limit (429): Rotate key, may retry with backoff
|
| 523 |
- server_error (5xx): Retry with backoff, then rotate
|
|
@@ -528,7 +547,62 @@ def classify_error(e: Exception) -> ClassifiedError:
|
|
| 528 |
- context_window_exceeded: Don't retry - request too large
|
| 529 |
- api_connection: Retry with backoff, then rotate
|
| 530 |
- unknown: Rotate key (safer to try another)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
status_code = getattr(e, "status_code", None)
|
| 533 |
|
| 534 |
if isinstance(e, httpx.HTTPStatusError): # [NEW] Handle httpx errors first
|
|
|
|
| 1 |
import re
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
+
import logging
|
| 5 |
from typing import Optional, Dict, Any
|
| 6 |
import httpx
|
| 7 |
|
|
|
|
| 18 |
ContextWindowExceededError,
|
| 19 |
)
|
| 20 |
|
| 21 |
+
lib_logger = logging.getLogger("rotator_library")
|
| 22 |
+
|
| 23 |
|
| 24 |
def _parse_duration_string(duration_str: str) -> Optional[int]:
|
| 25 |
"""
|
|
|
|
| 347 |
original_exception: Exception,
|
| 348 |
status_code: Optional[int] = None,
|
| 349 |
retry_after: Optional[int] = None,
|
| 350 |
+
quota_reset_timestamp: Optional[float] = None,
|
| 351 |
):
|
| 352 |
self.error_type = error_type
|
| 353 |
self.original_exception = original_exception
|
| 354 |
self.status_code = status_code
|
| 355 |
self.retry_after = retry_after
|
| 356 |
+
# Unix timestamp when quota resets (from quota_exhausted errors)
|
| 357 |
+
# This is the authoritative reset time parsed from provider's error response
|
| 358 |
+
self.quota_reset_timestamp = quota_reset_timestamp
|
| 359 |
|
| 360 |
def __str__(self):
|
| 361 |
+
parts = [
|
| 362 |
+
f"type={self.error_type}",
|
| 363 |
+
f"status={self.status_code}",
|
| 364 |
+
f"retry_after={self.retry_after}",
|
| 365 |
+
]
|
| 366 |
+
if self.quota_reset_timestamp:
|
| 367 |
+
parts.append(f"quota_reset_ts={self.quota_reset_timestamp}")
|
| 368 |
+
parts.append(f"original_exc={self.original_exception}")
|
| 369 |
+
return f"ClassifiedError({', '.join(parts)})"
|
| 370 |
|
| 371 |
|
| 372 |
def _extract_retry_from_json_body(json_text: str) -> Optional[int]:
|
|
|
|
| 528 |
return None
|
| 529 |
|
| 530 |
|
| 531 |
+
def classify_error(e: Exception, provider: Optional[str] = None) -> ClassifiedError:
|
| 532 |
"""
|
| 533 |
Classifies an exception into a structured ClassifiedError object.
|
| 534 |
Now handles both litellm and httpx exceptions.
|
| 535 |
|
| 536 |
+
If provider is specified and has a parse_quota_error() method,
|
| 537 |
+
attempts provider-specific error parsing first before falling back
|
| 538 |
+
to generic classification.
|
| 539 |
+
|
| 540 |
Error types and their typical handling:
|
| 541 |
- rate_limit (429): Rotate key, may retry with backoff
|
| 542 |
- server_error (5xx): Retry with backoff, then rotate
|
|
|
|
| 547 |
- context_window_exceeded: Don't retry - request too large
|
| 548 |
- api_connection: Retry with backoff, then rotate
|
| 549 |
- unknown: Rotate key (safer to try another)
|
| 550 |
+
|
| 551 |
+
Args:
|
| 552 |
+
e: The exception to classify
|
| 553 |
+
provider: Optional provider name for provider-specific error parsing
|
| 554 |
+
|
| 555 |
+
Returns:
|
| 556 |
+
ClassifiedError with error_type, status_code, retry_after, etc.
|
| 557 |
"""
|
| 558 |
+
# Try provider-specific parsing first for 429/rate limit errors
|
| 559 |
+
if provider:
|
| 560 |
+
try:
|
| 561 |
+
from .providers import PROVIDER_PLUGINS
|
| 562 |
+
|
| 563 |
+
provider_class = PROVIDER_PLUGINS.get(provider)
|
| 564 |
+
|
| 565 |
+
if provider_class and hasattr(provider_class, "parse_quota_error"):
|
| 566 |
+
# Get error body if available
|
| 567 |
+
error_body = None
|
| 568 |
+
if hasattr(e, "response") and hasattr(e.response, "text"):
|
| 569 |
+
try:
|
| 570 |
+
error_body = e.response.text
|
| 571 |
+
except Exception:
|
| 572 |
+
pass
|
| 573 |
+
elif hasattr(e, "body"):
|
| 574 |
+
error_body = str(e.body)
|
| 575 |
+
|
| 576 |
+
quota_info = provider_class.parse_quota_error(e, error_body)
|
| 577 |
+
|
| 578 |
+
if quota_info and quota_info.get("retry_after"):
|
| 579 |
+
retry_after = quota_info["retry_after"]
|
| 580 |
+
reason = quota_info.get("reason", "QUOTA_EXHAUSTED")
|
| 581 |
+
reset_ts = quota_info.get("reset_timestamp")
|
| 582 |
+
quota_reset_timestamp = quota_info.get("quota_reset_timestamp")
|
| 583 |
+
|
| 584 |
+
# Log the parsed result with human-readable duration
|
| 585 |
+
hours = retry_after / 3600
|
| 586 |
+
lib_logger.info(
|
| 587 |
+
f"Provider '{provider}' parsed quota error: "
|
| 588 |
+
f"retry_after={retry_after}s ({hours:.1f}h), reason={reason}"
|
| 589 |
+
+ (f", resets at {reset_ts}" if reset_ts else "")
|
| 590 |
+
)
|
| 591 |
+
|
| 592 |
+
return ClassifiedError(
|
| 593 |
+
error_type="quota_exceeded",
|
| 594 |
+
original_exception=e,
|
| 595 |
+
status_code=429,
|
| 596 |
+
retry_after=retry_after,
|
| 597 |
+
quota_reset_timestamp=quota_reset_timestamp,
|
| 598 |
+
)
|
| 599 |
+
except Exception as parse_error:
|
| 600 |
+
lib_logger.debug(
|
| 601 |
+
f"Provider-specific error parsing failed for '{provider}': {parse_error}"
|
| 602 |
+
)
|
| 603 |
+
# Fall through to generic classification
|
| 604 |
+
|
| 605 |
+
# Generic classification logic
|
| 606 |
status_code = getattr(e, "status_code", None)
|
| 607 |
|
| 608 |
if isinstance(e, httpx.HTTPStatusError): # [NEW] Handle httpx errors first
|
src/rotator_library/providers/antigravity_provider.py
CHANGED
|
@@ -34,7 +34,7 @@ from urllib.parse import urlparse
|
|
| 34 |
import httpx
|
| 35 |
import litellm
|
| 36 |
|
| 37 |
-
from .provider_interface import ProviderInterface
|
| 38 |
from .antigravity_auth_base import AntigravityAuthBase
|
| 39 |
from .provider_cache import ProviderCache
|
| 40 |
from ..model_definitions import ModelDefinitions
|
|
@@ -50,7 +50,7 @@ lib_logger = logging.getLogger("rotator_library")
|
|
| 50 |
# Priority: daily (sandbox) → autopush (sandbox) → production
|
| 51 |
BASE_URLS = [
|
| 52 |
"https://daily-cloudcode-pa.sandbox.googleapis.com/v1internal",
|
| 53 |
-
"https://autopush-cloudcode-pa.sandbox.googleapis.com/v1internal",
|
| 54 |
"https://cloudcode-pa.googleapis.com/v1internal", # Production fallback
|
| 55 |
]
|
| 56 |
|
|
@@ -494,6 +494,227 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 494 |
|
| 495 |
skip_cost_calculation = True
|
| 496 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
def __init__(self):
|
| 498 |
super().__init__()
|
| 499 |
self.model_definitions = ModelDefinitions()
|
|
@@ -577,43 +798,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 577 |
f"claude_fix={self._enable_claude_tool_fix}, thinking_sanitization={self._enable_thinking_sanitization}"
|
| 578 |
)
|
| 579 |
|
| 580 |
-
# =========================================================================
|
| 581 |
-
# CREDENTIAL PRIORITIZATION
|
| 582 |
-
# =========================================================================
|
| 583 |
-
|
| 584 |
-
def get_credential_priority(self, credential: str) -> Optional[int]:
|
| 585 |
-
"""
|
| 586 |
-
Returns priority based on Antigravity tier.
|
| 587 |
-
Paid tiers: priority 1 (highest)
|
| 588 |
-
Free tier: priority 2
|
| 589 |
-
Legacy/Unknown: priority 10 (lowest)
|
| 590 |
-
|
| 591 |
-
Args:
|
| 592 |
-
credential: The credential path
|
| 593 |
-
|
| 594 |
-
Returns:
|
| 595 |
-
Priority level (1-10) or None if tier not yet discovered
|
| 596 |
-
"""
|
| 597 |
-
tier = self.project_tier_cache.get(credential)
|
| 598 |
-
|
| 599 |
-
# Lazy load from file if not in cache
|
| 600 |
-
if not tier:
|
| 601 |
-
tier = self._load_tier_from_file(credential)
|
| 602 |
-
|
| 603 |
-
if not tier:
|
| 604 |
-
return None # Not yet discovered
|
| 605 |
-
|
| 606 |
-
# Paid tiers get highest priority
|
| 607 |
-
if tier not in ["free-tier", "legacy-tier", "unknown"]:
|
| 608 |
-
return 1
|
| 609 |
-
|
| 610 |
-
# Free tier gets lower priority
|
| 611 |
-
if tier == "free-tier":
|
| 612 |
-
return 2
|
| 613 |
-
|
| 614 |
-
# Legacy and unknown get even lower
|
| 615 |
-
return 10
|
| 616 |
-
|
| 617 |
def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
|
| 618 |
"""
|
| 619 |
Load tier from credential file's _proxy_metadata and cache it.
|
|
@@ -2375,9 +2559,9 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 2375 |
f"Ignoring duplicate - this may indicate malformed conversation history."
|
| 2376 |
)
|
| 2377 |
continue
|
| 2378 |
-
lib_logger.debug(
|
| 2379 |
-
|
| 2380 |
-
)
|
| 2381 |
collected_responses[resp_id] = resp
|
| 2382 |
|
| 2383 |
# Try to satisfy pending groups (newest first)
|
|
@@ -2392,10 +2576,10 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 2392 |
collected_responses.pop(gid) for gid in group_ids
|
| 2393 |
]
|
| 2394 |
new_contents.append({"parts": group_responses, "role": "user"})
|
| 2395 |
-
lib_logger.debug(
|
| 2396 |
-
|
| 2397 |
-
|
| 2398 |
-
)
|
| 2399 |
pending_groups.pop(i)
|
| 2400 |
break
|
| 2401 |
continue
|
|
@@ -2415,10 +2599,10 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 2415 |
]
|
| 2416 |
|
| 2417 |
if call_ids:
|
| 2418 |
-
lib_logger.debug(
|
| 2419 |
-
|
| 2420 |
-
|
| 2421 |
-
)
|
| 2422 |
pending_groups.append(
|
| 2423 |
{
|
| 2424 |
"ids": call_ids,
|
|
@@ -3450,7 +3634,28 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 3450 |
return await self._handle_non_streaming(
|
| 3451 |
client, url, headers, payload, model, file_logger
|
| 3452 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3453 |
except Exception as e:
|
|
|
|
| 3454 |
if self._try_next_base_url():
|
| 3455 |
lib_logger.warning(f"Retrying with fallback URL: {e}")
|
| 3456 |
url = f"{self._get_base_url()}{endpoint}"
|
|
@@ -3534,11 +3739,13 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 3534 |
"POST", url, headers=headers, json=payload, timeout=600.0
|
| 3535 |
) as response:
|
| 3536 |
if response.status_code >= 400:
|
|
|
|
|
|
|
| 3537 |
try:
|
| 3538 |
-
|
| 3539 |
-
lib_logger.error(
|
| 3540 |
-
|
| 3541 |
-
)
|
| 3542 |
except Exception:
|
| 3543 |
pass
|
| 3544 |
|
|
|
|
| 34 |
import httpx
|
| 35 |
import litellm
|
| 36 |
|
| 37 |
+
from .provider_interface import ProviderInterface, UsageResetConfigDef, QuotaGroupMap
|
| 38 |
from .antigravity_auth_base import AntigravityAuthBase
|
| 39 |
from .provider_cache import ProviderCache
|
| 40 |
from ..model_definitions import ModelDefinitions
|
|
|
|
| 50 |
# Priority: daily (sandbox) → autopush (sandbox) → production
|
| 51 |
BASE_URLS = [
|
| 52 |
"https://daily-cloudcode-pa.sandbox.googleapis.com/v1internal",
|
| 53 |
+
# "https://autopush-cloudcode-pa.sandbox.googleapis.com/v1internal",
|
| 54 |
"https://cloudcode-pa.googleapis.com/v1internal", # Production fallback
|
| 55 |
]
|
| 56 |
|
|
|
|
| 494 |
|
| 495 |
skip_cost_calculation = True
|
| 496 |
|
| 497 |
+
# Sequential mode by default - preserves thinking signature caches between requests
|
| 498 |
+
default_rotation_mode: str = "sequential"
|
| 499 |
+
|
| 500 |
+
# =========================================================================
|
| 501 |
+
# TIER & USAGE CONFIGURATION
|
| 502 |
+
# =========================================================================
|
| 503 |
+
|
| 504 |
+
# Provider name for env var lookups (QUOTA_GROUPS_ANTIGRAVITY_*)
|
| 505 |
+
provider_env_name: str = "antigravity"
|
| 506 |
+
|
| 507 |
+
# Tier name -> priority mapping (Single Source of Truth)
|
| 508 |
+
# Lower numbers = higher priority
|
| 509 |
+
tier_priorities = {
|
| 510 |
+
# Priority 1: Highest paid tier (Google AI Ultra - name unconfirmed)
|
| 511 |
+
# "google-ai-ultra": 1, # Uncomment when tier name is confirmed
|
| 512 |
+
# Priority 2: Standard paid tier
|
| 513 |
+
"standard-tier": 2,
|
| 514 |
+
# Priority 3: Free tier
|
| 515 |
+
"free-tier": 3,
|
| 516 |
+
# Priority 10: Legacy/Unknown (lowest)
|
| 517 |
+
"legacy-tier": 10,
|
| 518 |
+
"unknown": 10,
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
# Default priority for tiers not in the mapping
|
| 522 |
+
default_tier_priority: int = 10
|
| 523 |
+
|
| 524 |
+
# Usage reset configs keyed by priority sets
|
| 525 |
+
# Priorities 1-2 (paid tiers) get 5h window, others get 7d window
|
| 526 |
+
usage_reset_configs = {
|
| 527 |
+
frozenset({1, 2}): UsageResetConfigDef(
|
| 528 |
+
window_seconds=5 * 60 * 60, # 5 hours
|
| 529 |
+
mode="per_model",
|
| 530 |
+
description="5-hour per-model window (paid tier)",
|
| 531 |
+
field_name="models",
|
| 532 |
+
),
|
| 533 |
+
"default": UsageResetConfigDef(
|
| 534 |
+
window_seconds=7 * 24 * 60 * 60, # 7 days
|
| 535 |
+
mode="per_model",
|
| 536 |
+
description="7-day per-model window (free/unknown tier)",
|
| 537 |
+
field_name="models",
|
| 538 |
+
),
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
# Model quota groups (can be overridden via QUOTA_GROUPS_ANTIGRAVITY_CLAUDE)
|
| 542 |
+
# Models in the same group share quota - when one is exhausted, all are
|
| 543 |
+
model_quota_groups: QuotaGroupMap = {
|
| 544 |
+
"claude": ["claude-sonnet-4-5", "claude-opus-4-5"],
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
# Model usage weights for grouped usage calculation
|
| 548 |
+
# Opus consumes more quota per request, so its usage counts 2x when
|
| 549 |
+
# comparing credentials for selection
|
| 550 |
+
model_usage_weights = {
|
| 551 |
+
"claude-opus-4-5": 2,
|
| 552 |
+
}
|
| 553 |
+
|
| 554 |
+
# Priority-based concurrency multipliers
|
| 555 |
+
# Higher priority credentials (lower number) get higher multipliers
|
| 556 |
+
# Priority 1 (paid ultra): 5x concurrent requests
|
| 557 |
+
# Priority 2 (standard paid): 3x concurrent requests
|
| 558 |
+
# Others: Use sequential fallback (2x) or balanced default (1x)
|
| 559 |
+
default_priority_multipliers = {1: 5, 2: 3}
|
| 560 |
+
|
| 561 |
+
# For sequential mode, lower priority tiers still get 2x to maintain stickiness
|
| 562 |
+
# For balanced mode, this doesn't apply (falls back to 1x)
|
| 563 |
+
default_sequential_fallback_multiplier = 2
|
| 564 |
+
|
| 565 |
+
@staticmethod
|
| 566 |
+
def parse_quota_error(
|
| 567 |
+
error: Exception, error_body: Optional[str] = None
|
| 568 |
+
) -> Optional[Dict[str, Any]]:
|
| 569 |
+
"""
|
| 570 |
+
Parse Antigravity/Google RPC quota errors.
|
| 571 |
+
|
| 572 |
+
Handles the Google Cloud API error format with ErrorInfo and RetryInfo details.
|
| 573 |
+
|
| 574 |
+
Example error format:
|
| 575 |
+
{
|
| 576 |
+
"error": {
|
| 577 |
+
"code": 429,
|
| 578 |
+
"details": [
|
| 579 |
+
{
|
| 580 |
+
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
| 581 |
+
"reason": "QUOTA_EXHAUSTED",
|
| 582 |
+
"metadata": {
|
| 583 |
+
"quotaResetDelay": "143h4m52.730699158s",
|
| 584 |
+
"quotaResetTimeStamp": "2025-12-11T22:53:16Z"
|
| 585 |
+
}
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"@type": "type.googleapis.com/google.rpc.RetryInfo",
|
| 589 |
+
"retryDelay": "515092.730699158s"
|
| 590 |
+
}
|
| 591 |
+
]
|
| 592 |
+
}
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
Args:
|
| 596 |
+
error: The caught exception
|
| 597 |
+
error_body: Optional raw response body string
|
| 598 |
+
|
| 599 |
+
Returns:
|
| 600 |
+
None if not a parseable quota error, otherwise:
|
| 601 |
+
{
|
| 602 |
+
"retry_after": int,
|
| 603 |
+
"reason": str,
|
| 604 |
+
"reset_timestamp": str | None,
|
| 605 |
+
}
|
| 606 |
+
"""
|
| 607 |
+
import re as regex_module
|
| 608 |
+
|
| 609 |
+
def parse_duration(duration_str: str) -> Optional[int]:
|
| 610 |
+
"""Parse duration strings like '143h4m52.73s' or '515092.73s' to seconds."""
|
| 611 |
+
if not duration_str:
|
| 612 |
+
return None
|
| 613 |
+
|
| 614 |
+
# Handle pure seconds format: "515092.730699158s"
|
| 615 |
+
pure_seconds_match = regex_module.match(r"^([\d.]+)s$", duration_str)
|
| 616 |
+
if pure_seconds_match:
|
| 617 |
+
return int(float(pure_seconds_match.group(1)))
|
| 618 |
+
|
| 619 |
+
# Handle compound format: "143h4m52.730699158s"
|
| 620 |
+
total_seconds = 0
|
| 621 |
+
patterns = [
|
| 622 |
+
(r"(\d+)h", 3600), # hours
|
| 623 |
+
(r"(\d+)m", 60), # minutes
|
| 624 |
+
(r"([\d.]+)s", 1), # seconds
|
| 625 |
+
]
|
| 626 |
+
for pattern, multiplier in patterns:
|
| 627 |
+
match = regex_module.search(pattern, duration_str)
|
| 628 |
+
if match:
|
| 629 |
+
total_seconds += float(match.group(1)) * multiplier
|
| 630 |
+
|
| 631 |
+
return int(total_seconds) if total_seconds > 0 else None
|
| 632 |
+
|
| 633 |
+
# Get error body from exception if not provided
|
| 634 |
+
body = error_body
|
| 635 |
+
if not body:
|
| 636 |
+
# Try to extract from various exception attributes
|
| 637 |
+
if hasattr(error, "response") and hasattr(error.response, "text"):
|
| 638 |
+
body = error.response.text
|
| 639 |
+
elif hasattr(error, "body"):
|
| 640 |
+
body = str(error.body)
|
| 641 |
+
elif hasattr(error, "message"):
|
| 642 |
+
body = str(error.message)
|
| 643 |
+
else:
|
| 644 |
+
body = str(error)
|
| 645 |
+
|
| 646 |
+
# Try to find JSON in the body
|
| 647 |
+
try:
|
| 648 |
+
# Handle cases where JSON is embedded in a larger string
|
| 649 |
+
json_match = regex_module.search(r"\{[\s\S]*\}", body)
|
| 650 |
+
if not json_match:
|
| 651 |
+
return None
|
| 652 |
+
|
| 653 |
+
data = json.loads(json_match.group(0))
|
| 654 |
+
except (json.JSONDecodeError, AttributeError, TypeError):
|
| 655 |
+
return None
|
| 656 |
+
|
| 657 |
+
# Navigate to error.details
|
| 658 |
+
error_obj = data.get("error", data)
|
| 659 |
+
details = error_obj.get("details", [])
|
| 660 |
+
|
| 661 |
+
if not details:
|
| 662 |
+
return None
|
| 663 |
+
|
| 664 |
+
result = {
|
| 665 |
+
"retry_after": None,
|
| 666 |
+
"reason": None,
|
| 667 |
+
"reset_timestamp": None,
|
| 668 |
+
"quota_reset_timestamp": None, # Unix timestamp for quota reset
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
for detail in details:
|
| 672 |
+
detail_type = detail.get("@type", "")
|
| 673 |
+
|
| 674 |
+
# Parse RetryInfo - most authoritative source for retry delay
|
| 675 |
+
if "RetryInfo" in detail_type:
|
| 676 |
+
retry_delay = detail.get("retryDelay")
|
| 677 |
+
if retry_delay:
|
| 678 |
+
parsed = parse_duration(retry_delay)
|
| 679 |
+
if parsed:
|
| 680 |
+
result["retry_after"] = parsed
|
| 681 |
+
|
| 682 |
+
# Parse ErrorInfo - contains reason and quota reset metadata
|
| 683 |
+
elif "ErrorInfo" in detail_type:
|
| 684 |
+
result["reason"] = detail.get("reason")
|
| 685 |
+
metadata = detail.get("metadata", {})
|
| 686 |
+
|
| 687 |
+
# Get quotaResetDelay as fallback if RetryInfo not present
|
| 688 |
+
if not result["retry_after"]:
|
| 689 |
+
quota_delay = metadata.get("quotaResetDelay")
|
| 690 |
+
if quota_delay:
|
| 691 |
+
parsed = parse_duration(quota_delay)
|
| 692 |
+
if parsed:
|
| 693 |
+
result["retry_after"] = parsed
|
| 694 |
+
|
| 695 |
+
# Capture reset timestamp for logging and authoritative reset time
|
| 696 |
+
reset_ts_str = metadata.get("quotaResetTimeStamp")
|
| 697 |
+
result["reset_timestamp"] = reset_ts_str
|
| 698 |
+
|
| 699 |
+
# Parse ISO timestamp to Unix timestamp for usage tracking
|
| 700 |
+
if reset_ts_str:
|
| 701 |
+
try:
|
| 702 |
+
# Handle ISO format: "2025-12-11T22:53:16Z"
|
| 703 |
+
reset_dt = datetime.fromisoformat(
|
| 704 |
+
reset_ts_str.replace("Z", "+00:00")
|
| 705 |
+
)
|
| 706 |
+
result["quota_reset_timestamp"] = reset_dt.timestamp()
|
| 707 |
+
except (ValueError, AttributeError) as e:
|
| 708 |
+
lib_logger.warning(
|
| 709 |
+
f"Failed to parse quota reset timestamp '{reset_ts_str}': {e}"
|
| 710 |
+
)
|
| 711 |
+
|
| 712 |
+
# Return None if we couldn't extract retry_after
|
| 713 |
+
if not result["retry_after"]:
|
| 714 |
+
return None
|
| 715 |
+
|
| 716 |
+
return result
|
| 717 |
+
|
| 718 |
def __init__(self):
|
| 719 |
super().__init__()
|
| 720 |
self.model_definitions = ModelDefinitions()
|
|
|
|
| 798 |
f"claude_fix={self._enable_claude_tool_fix}, thinking_sanitization={self._enable_thinking_sanitization}"
|
| 799 |
)
|
| 800 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 801 |
def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
|
| 802 |
"""
|
| 803 |
Load tier from credential file's _proxy_metadata and cache it.
|
|
|
|
| 2559 |
f"Ignoring duplicate - this may indicate malformed conversation history."
|
| 2560 |
)
|
| 2561 |
continue
|
| 2562 |
+
#lib_logger.debug(
|
| 2563 |
+
# f"[Grouping] Collected response for ID: {resp_id}"
|
| 2564 |
+
#)
|
| 2565 |
collected_responses[resp_id] = resp
|
| 2566 |
|
| 2567 |
# Try to satisfy pending groups (newest first)
|
|
|
|
| 2576 |
collected_responses.pop(gid) for gid in group_ids
|
| 2577 |
]
|
| 2578 |
new_contents.append({"parts": group_responses, "role": "user"})
|
| 2579 |
+
#lib_logger.debug(
|
| 2580 |
+
# f"[Grouping] Satisfied group with {len(group_responses)} responses: "
|
| 2581 |
+
# f"ids={group_ids}"
|
| 2582 |
+
#)
|
| 2583 |
pending_groups.pop(i)
|
| 2584 |
break
|
| 2585 |
continue
|
|
|
|
| 2599 |
]
|
| 2600 |
|
| 2601 |
if call_ids:
|
| 2602 |
+
#lib_logger.debug(
|
| 2603 |
+
# f"[Grouping] Created pending group expecting {len(call_ids)} responses: "
|
| 2604 |
+
# f"ids={call_ids}, names={func_names}"
|
| 2605 |
+
#)
|
| 2606 |
pending_groups.append(
|
| 2607 |
{
|
| 2608 |
"ids": call_ids,
|
|
|
|
| 3634 |
return await self._handle_non_streaming(
|
| 3635 |
client, url, headers, payload, model, file_logger
|
| 3636 |
)
|
| 3637 |
+
except httpx.HTTPStatusError as e:
|
| 3638 |
+
# 429 = Rate limit/quota exhausted - tied to credential, not URL
|
| 3639 |
+
# Do NOT retry on different URL, just raise immediately
|
| 3640 |
+
if e.response.status_code == 429:
|
| 3641 |
+
lib_logger.debug(f"429 quota error - not retrying on fallback URL: {e}")
|
| 3642 |
+
raise
|
| 3643 |
+
|
| 3644 |
+
# For other HTTP errors (403, 500, etc.), try fallback URL
|
| 3645 |
+
if self._try_next_base_url():
|
| 3646 |
+
lib_logger.warning(f"Retrying with fallback URL: {e}")
|
| 3647 |
+
url = f"{self._get_base_url()}{endpoint}"
|
| 3648 |
+
if stream:
|
| 3649 |
+
return self._handle_streaming(
|
| 3650 |
+
client, url, headers, payload, model, file_logger
|
| 3651 |
+
)
|
| 3652 |
+
else:
|
| 3653 |
+
return await self._handle_non_streaming(
|
| 3654 |
+
client, url, headers, payload, model, file_logger
|
| 3655 |
+
)
|
| 3656 |
+
raise
|
| 3657 |
except Exception as e:
|
| 3658 |
+
# Non-HTTP errors (network issues, timeouts, etc.) - try fallback URL
|
| 3659 |
if self._try_next_base_url():
|
| 3660 |
lib_logger.warning(f"Retrying with fallback URL: {e}")
|
| 3661 |
url = f"{self._get_base_url()}{endpoint}"
|
|
|
|
| 3739 |
"POST", url, headers=headers, json=payload, timeout=600.0
|
| 3740 |
) as response:
|
| 3741 |
if response.status_code >= 400:
|
| 3742 |
+
# Read error body for raise_for_status to include in exception
|
| 3743 |
+
# Terminal logging commented out - errors are logged in failures.log
|
| 3744 |
try:
|
| 3745 |
+
await response.aread()
|
| 3746 |
+
# lib_logger.error(
|
| 3747 |
+
# f"API error {response.status_code}: {error_body.decode()}"
|
| 3748 |
+
# )
|
| 3749 |
except Exception:
|
| 3750 |
pass
|
| 3751 |
|
src/rotator_library/providers/gemini_cli_provider.py
CHANGED
|
@@ -186,6 +186,71 @@ def _env_int(key: str, default: int) -> int:
|
|
| 186 |
class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
| 187 |
skip_cost_calculation = True
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
def __init__(self):
|
| 190 |
super().__init__()
|
| 191 |
self.model_definitions = ModelDefinitions()
|
|
@@ -239,41 +304,13 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
|
| 239 |
)
|
| 240 |
|
| 241 |
# =========================================================================
|
| 242 |
-
# CREDENTIAL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
# =========================================================================
|
| 244 |
-
|
| 245 |
-
def get_credential_priority(self, credential: str) -> Optional[int]:
|
| 246 |
-
"""
|
| 247 |
-
Returns priority based on Gemini tier.
|
| 248 |
-
Paid tiers: priority 1 (highest)
|
| 249 |
-
Free/Legacy tiers: priority 2
|
| 250 |
-
Unknown: priority 10 (lowest)
|
| 251 |
-
|
| 252 |
-
Args:
|
| 253 |
-
credential: The credential path
|
| 254 |
-
|
| 255 |
-
Returns:
|
| 256 |
-
Priority level (1-10) or None if tier not yet discovered
|
| 257 |
-
"""
|
| 258 |
-
tier = self.project_tier_cache.get(credential)
|
| 259 |
-
|
| 260 |
-
# Lazy load from file if not in cache
|
| 261 |
-
if not tier:
|
| 262 |
-
tier = self._load_tier_from_file(credential)
|
| 263 |
-
|
| 264 |
-
if not tier:
|
| 265 |
-
return None # Not yet discovered
|
| 266 |
-
|
| 267 |
-
# Paid tiers get highest priority
|
| 268 |
-
if tier not in ["free-tier", "legacy-tier", "unknown"]:
|
| 269 |
-
return 1
|
| 270 |
-
|
| 271 |
-
# Free tier gets lower priority
|
| 272 |
-
if tier == "free-tier":
|
| 273 |
-
return 2
|
| 274 |
-
|
| 275 |
-
# Legacy and unknown get even lower
|
| 276 |
-
return 10
|
| 277 |
|
| 278 |
def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
|
| 279 |
"""
|
|
|
|
| 186 |
class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
| 187 |
skip_cost_calculation = True
|
| 188 |
|
| 189 |
+
# Balanced by default - Gemini CLI has short cooldowns (seconds, not hours)
|
| 190 |
+
default_rotation_mode: str = "balanced"
|
| 191 |
+
|
| 192 |
+
# =========================================================================
|
| 193 |
+
# TIER CONFIGURATION
|
| 194 |
+
# =========================================================================
|
| 195 |
+
|
| 196 |
+
# Provider name for env var lookups (QUOTA_GROUPS_GEMINI_CLI_*)
|
| 197 |
+
provider_env_name: str = "gemini_cli"
|
| 198 |
+
|
| 199 |
+
# Tier name -> priority mapping (Single Source of Truth)
|
| 200 |
+
# Same tier names as Antigravity (coincidentally), but defined separately
|
| 201 |
+
tier_priorities = {
|
| 202 |
+
# Priority 1: Highest paid tier (Google AI Ultra - name unconfirmed)
|
| 203 |
+
# "google-ai-ultra": 1, # Uncomment when tier name is confirmed
|
| 204 |
+
# Priority 2: Standard paid tier
|
| 205 |
+
"standard-tier": 2,
|
| 206 |
+
# Priority 3: Free tier
|
| 207 |
+
"free-tier": 3,
|
| 208 |
+
# Priority 10: Legacy/Unknown (lowest)
|
| 209 |
+
"legacy-tier": 10,
|
| 210 |
+
"unknown": 10,
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
# Default priority for tiers not in the mapping
|
| 214 |
+
default_tier_priority: int = 10
|
| 215 |
+
|
| 216 |
+
# Gemini CLI uses default daily reset - no custom usage_reset_configs
|
| 217 |
+
# (Empty dict means inherited get_usage_reset_config returns None)
|
| 218 |
+
|
| 219 |
+
# No quota groups defined for Gemini CLI
|
| 220 |
+
# (Models don't share quotas)
|
| 221 |
+
|
| 222 |
+
# Priority-based concurrency multipliers
|
| 223 |
+
# Same structure as Antigravity (by coincidence, tiers share naming)
|
| 224 |
+
# Priority 1 (paid ultra): 5x concurrent requests
|
| 225 |
+
# Priority 2 (standard paid): 3x concurrent requests
|
| 226 |
+
# Others: 1x (no sequential fallback, uses global default)
|
| 227 |
+
default_priority_multipliers = {1: 5, 2: 3}
|
| 228 |
+
|
| 229 |
+
# No sequential fallback for Gemini CLI (uses balanced mode default)
|
| 230 |
+
# default_sequential_fallback_multiplier = 1 (inherited from ProviderInterface)
|
| 231 |
+
|
| 232 |
+
@staticmethod
|
| 233 |
+
def parse_quota_error(
|
| 234 |
+
error: Exception, error_body: Optional[str] = None
|
| 235 |
+
) -> Optional[Dict[str, Any]]:
|
| 236 |
+
"""
|
| 237 |
+
Parse Gemini CLI quota errors.
|
| 238 |
+
|
| 239 |
+
Uses the same Google RPC format as Antigravity but typically has
|
| 240 |
+
much shorter cooldown durations (seconds to minutes, not hours).
|
| 241 |
+
|
| 242 |
+
Args:
|
| 243 |
+
error: The caught exception
|
| 244 |
+
error_body: Optional raw response body string
|
| 245 |
+
|
| 246 |
+
Returns:
|
| 247 |
+
Same format as AntigravityProvider.parse_quota_error()
|
| 248 |
+
"""
|
| 249 |
+
# Reuse the same parsing logic as Antigravity since both use Google RPC format
|
| 250 |
+
from .antigravity_provider import AntigravityProvider
|
| 251 |
+
|
| 252 |
+
return AntigravityProvider.parse_quota_error(error, error_body)
|
| 253 |
+
|
| 254 |
def __init__(self):
|
| 255 |
super().__init__()
|
| 256 |
self.model_definitions = ModelDefinitions()
|
|
|
|
| 304 |
)
|
| 305 |
|
| 306 |
# =========================================================================
|
| 307 |
+
# CREDENTIAL TIER LOOKUP (Provider-specific - uses cache)
|
| 308 |
+
# =========================================================================
|
| 309 |
+
#
|
| 310 |
+
# NOTE: get_credential_priority() is now inherited from ProviderInterface.
|
| 311 |
+
# It uses get_credential_tier_name() to get the tier and resolve priority
|
| 312 |
+
# from the tier_priorities class attribute.
|
| 313 |
# =========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
|
| 316 |
"""
|
src/rotator_library/providers/provider_interface.py
CHANGED
|
@@ -1,9 +1,46 @@
|
|
| 1 |
from abc import ABC, abstractmethod
|
| 2 |
-
from
|
|
|
|
|
|
|
| 3 |
import httpx
|
| 4 |
import litellm
|
| 5 |
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
class ProviderInterface(ABC):
|
| 8 |
"""
|
| 9 |
An interface for API provider-specific functionality, including model
|
|
@@ -12,6 +49,69 @@ class ProviderInterface(ABC):
|
|
| 12 |
|
| 13 |
skip_cost_calculation: bool = False
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
@abstractmethod
|
| 16 |
async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
|
| 17 |
"""
|
|
@@ -81,28 +181,50 @@ class ProviderInterface(ABC):
|
|
| 81 |
pass
|
| 82 |
|
| 83 |
# [NEW] Credential Prioritization System
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
def get_credential_priority(self, credential: str) -> Optional[int]:
|
| 85 |
"""
|
| 86 |
Returns the priority level for a credential.
|
| 87 |
Lower numbers = higher priority (1 is highest).
|
| 88 |
-
Returns None if
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
Args:
|
| 94 |
credential: The credential identifier (API key or path)
|
| 95 |
|
| 96 |
Returns:
|
| 97 |
-
Priority level (1-10) or None if
|
| 98 |
-
|
| 99 |
-
Example:
|
| 100 |
-
For Gemini CLI:
|
| 101 |
-
- Paid tier credentials: priority 1 (highest)
|
| 102 |
-
- Free tier credentials: priority 2
|
| 103 |
-
- Unknown tier: priority 10 (lowest)
|
| 104 |
"""
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
def get_model_tier_requirement(self, model: str) -> Optional[int]:
|
| 108 |
"""
|
|
@@ -153,3 +275,274 @@ class ProviderInterface(ABC):
|
|
| 153 |
Tier name string (e.g., "free-tier", "paid-tier") or None if unknown
|
| 154 |
"""
|
| 155 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from abc import ABC, abstractmethod
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
from typing import List, Dict, Any, Optional, AsyncGenerator, Union, FrozenSet
|
| 4 |
+
import os
|
| 5 |
import httpx
|
| 6 |
import litellm
|
| 7 |
|
| 8 |
|
| 9 |
+
# =============================================================================
|
| 10 |
+
# TIER & USAGE CONFIGURATION TYPES
|
| 11 |
+
# =============================================================================
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@dataclass(frozen=True)
|
| 15 |
+
class UsageResetConfigDef:
|
| 16 |
+
"""
|
| 17 |
+
Definition for usage reset configuration per tier type.
|
| 18 |
+
|
| 19 |
+
Providers define these as class attributes to specify how usage stats
|
| 20 |
+
should reset based on credential tier (paid vs free).
|
| 21 |
+
|
| 22 |
+
Attributes:
|
| 23 |
+
window_seconds: Duration of the usage tracking window in seconds.
|
| 24 |
+
mode: Either "credential" (one window per credential) or "per_model"
|
| 25 |
+
(separate window per model or model group).
|
| 26 |
+
description: Human-readable description for logging.
|
| 27 |
+
field_name: The key used in usage data JSON structure.
|
| 28 |
+
Typically "models" for per_model mode, "daily" for credential mode.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
window_seconds: int
|
| 32 |
+
mode: str # "credential" or "per_model"
|
| 33 |
+
description: str
|
| 34 |
+
field_name: str = "daily" # Default for backwards compatibility
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# Type aliases for provider configuration
|
| 38 |
+
TierPriorityMap = Dict[str, int] # tier_name -> priority
|
| 39 |
+
UsageConfigKey = Union[FrozenSet[int], str] # frozenset of priorities OR "default"
|
| 40 |
+
UsageConfigMap = Dict[UsageConfigKey, UsageResetConfigDef] # priority_set -> config
|
| 41 |
+
QuotaGroupMap = Dict[str, List[str]] # group_name -> [models]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
class ProviderInterface(ABC):
|
| 45 |
"""
|
| 46 |
An interface for API provider-specific functionality, including model
|
|
|
|
| 49 |
|
| 50 |
skip_cost_calculation: bool = False
|
| 51 |
|
| 52 |
+
# Default rotation mode for this provider ("balanced" or "sequential")
|
| 53 |
+
# - "balanced": Rotate credentials to distribute load evenly
|
| 54 |
+
# - "sequential": Use one credential until exhausted, then switch to next
|
| 55 |
+
default_rotation_mode: str = "balanced"
|
| 56 |
+
|
| 57 |
+
# =========================================================================
|
| 58 |
+
# TIER CONFIGURATION - Override in subclass
|
| 59 |
+
# =========================================================================
|
| 60 |
+
|
| 61 |
+
# Provider name for env var lookups (e.g., "antigravity", "gemini_cli")
|
| 62 |
+
# Used for: QUOTA_GROUPS_{provider_env_name}_{GROUP}
|
| 63 |
+
provider_env_name: str = ""
|
| 64 |
+
|
| 65 |
+
# Tier name -> priority mapping (Single Source of Truth)
|
| 66 |
+
# Lower numbers = higher priority (1 is highest)
|
| 67 |
+
# Multiple tiers can map to the same priority
|
| 68 |
+
# Unknown tiers fall back to default_tier_priority
|
| 69 |
+
tier_priorities: TierPriorityMap = {}
|
| 70 |
+
|
| 71 |
+
# Default priority for tiers not in tier_priorities mapping
|
| 72 |
+
default_tier_priority: int = 10
|
| 73 |
+
|
| 74 |
+
# =========================================================================
|
| 75 |
+
# USAGE RESET CONFIGURATION - Override in subclass
|
| 76 |
+
# =========================================================================
|
| 77 |
+
|
| 78 |
+
# Usage reset configurations keyed by priority sets
|
| 79 |
+
# Keys: frozenset of priority values (e.g., frozenset({1, 2})) OR "default"
|
| 80 |
+
# The "default" key is used for any priority not matched by a frozenset
|
| 81 |
+
usage_reset_configs: UsageConfigMap = {}
|
| 82 |
+
|
| 83 |
+
# =========================================================================
|
| 84 |
+
# MODEL QUOTA GROUPS - Override in subclass
|
| 85 |
+
# =========================================================================
|
| 86 |
+
|
| 87 |
+
# Models that share quota/cooldown timing
|
| 88 |
+
# Can be overridden via env: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
|
| 89 |
+
model_quota_groups: QuotaGroupMap = {}
|
| 90 |
+
|
| 91 |
+
# Model usage weights for grouped usage calculation
|
| 92 |
+
# When calculating combined usage for quota groups, each model's usage
|
| 93 |
+
# is multiplied by its weight. This accounts for models that consume
|
| 94 |
+
# more quota per request (e.g., Opus uses more than Sonnet).
|
| 95 |
+
# Models not in the map default to weight 1.
|
| 96 |
+
# Example: {"claude-opus-4-5": 2} means Opus usage counts 2x
|
| 97 |
+
model_usage_weights: Dict[str, int] = {}
|
| 98 |
+
|
| 99 |
+
# =========================================================================
|
| 100 |
+
# PRIORITY CONCURRENCY MULTIPLIERS - Override in subclass
|
| 101 |
+
# =========================================================================
|
| 102 |
+
|
| 103 |
+
# Priority-based concurrency multipliers (universal, applies to all modes)
|
| 104 |
+
# Maps priority level -> multiplier
|
| 105 |
+
# Higher priority credentials (lower number) can have higher multipliers
|
| 106 |
+
# to allow more concurrent requests
|
| 107 |
+
# Example: {1: 5, 2: 3} means Priority 1 gets 5x, Priority 2 gets 3x
|
| 108 |
+
default_priority_multipliers: Dict[int, int] = {}
|
| 109 |
+
|
| 110 |
+
# Fallback multiplier for sequential mode when priority not in default_priority_multipliers
|
| 111 |
+
# This is used for lower-priority tiers in sequential mode to maintain some stickiness
|
| 112 |
+
# Default: 1 (no multiplier effect)
|
| 113 |
+
default_sequential_fallback_multiplier: int = 1
|
| 114 |
+
|
| 115 |
@abstractmethod
|
| 116 |
async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
|
| 117 |
"""
|
|
|
|
| 181 |
pass
|
| 182 |
|
| 183 |
# [NEW] Credential Prioritization System
|
| 184 |
+
|
| 185 |
+
# =========================================================================
|
| 186 |
+
# TIER RESOLUTION LOGIC (Centralized)
|
| 187 |
+
# =========================================================================
|
| 188 |
+
|
| 189 |
+
def _resolve_tier_priority(self, tier_name: Optional[str]) -> int:
|
| 190 |
+
"""
|
| 191 |
+
Resolve priority for a tier name using provider's tier_priorities mapping.
|
| 192 |
+
|
| 193 |
+
Args:
|
| 194 |
+
tier_name: The tier name string (e.g., "free-tier", "standard-tier")
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
Priority level from tier_priorities, or default_tier_priority if
|
| 198 |
+
tier_name is None or not found in the mapping.
|
| 199 |
+
"""
|
| 200 |
+
if tier_name is None:
|
| 201 |
+
return self.default_tier_priority
|
| 202 |
+
return self.tier_priorities.get(tier_name, self.default_tier_priority)
|
| 203 |
+
|
| 204 |
def get_credential_priority(self, credential: str) -> Optional[int]:
|
| 205 |
"""
|
| 206 |
Returns the priority level for a credential.
|
| 207 |
Lower numbers = higher priority (1 is highest).
|
| 208 |
+
Returns None if tier not yet discovered.
|
| 209 |
|
| 210 |
+
Uses the provider's tier_priorities mapping to resolve priority from
|
| 211 |
+
tier name. Unknown tiers fall back to default_tier_priority.
|
| 212 |
+
|
| 213 |
+
Subclasses should:
|
| 214 |
+
1. Define tier_priorities dict with all known tier names
|
| 215 |
+
2. Override get_credential_tier_name() for tier lookup
|
| 216 |
+
Do NOT override this method.
|
| 217 |
|
| 218 |
Args:
|
| 219 |
credential: The credential identifier (API key or path)
|
| 220 |
|
| 221 |
Returns:
|
| 222 |
+
Priority level (1-10) or None if tier not yet discovered
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
"""
|
| 224 |
+
tier = self.get_credential_tier_name(credential)
|
| 225 |
+
if tier is None:
|
| 226 |
+
return None # Tier not yet discovered
|
| 227 |
+
return self._resolve_tier_priority(tier)
|
| 228 |
|
| 229 |
def get_model_tier_requirement(self, model: str) -> Optional[int]:
|
| 230 |
"""
|
|
|
|
| 275 |
Tier name string (e.g., "free-tier", "paid-tier") or None if unknown
|
| 276 |
"""
|
| 277 |
return None
|
| 278 |
+
|
| 279 |
+
# =========================================================================
|
| 280 |
+
# Sequential Rotation Support
|
| 281 |
+
# =========================================================================
|
| 282 |
+
|
| 283 |
+
@classmethod
|
| 284 |
+
def get_rotation_mode(cls, provider_name: str) -> str:
|
| 285 |
+
"""
|
| 286 |
+
Get the rotation mode for this provider.
|
| 287 |
+
|
| 288 |
+
Checks ROTATION_MODE_{PROVIDER} environment variable first,
|
| 289 |
+
then falls back to the class's default_rotation_mode.
|
| 290 |
+
|
| 291 |
+
Args:
|
| 292 |
+
provider_name: The provider name (e.g., "antigravity", "gemini_cli")
|
| 293 |
+
|
| 294 |
+
Returns:
|
| 295 |
+
"balanced" or "sequential"
|
| 296 |
+
"""
|
| 297 |
+
env_key = f"ROTATION_MODE_{provider_name.upper()}"
|
| 298 |
+
return os.getenv(env_key, cls.default_rotation_mode)
|
| 299 |
+
|
| 300 |
+
@staticmethod
|
| 301 |
+
def parse_quota_error(
|
| 302 |
+
error: Exception, error_body: Optional[str] = None
|
| 303 |
+
) -> Optional[Dict[str, Any]]:
|
| 304 |
+
"""
|
| 305 |
+
Parse a quota/rate-limit error and extract structured information.
|
| 306 |
+
|
| 307 |
+
Providers should override this method to handle their specific error formats.
|
| 308 |
+
This allows the error_handler to use provider-specific parsing when available,
|
| 309 |
+
falling back to generic parsing otherwise.
|
| 310 |
+
|
| 311 |
+
Args:
|
| 312 |
+
error: The caught exception
|
| 313 |
+
error_body: Optional raw response body string
|
| 314 |
+
|
| 315 |
+
Returns:
|
| 316 |
+
None if not a parseable quota error, otherwise:
|
| 317 |
+
{
|
| 318 |
+
"retry_after": int, # seconds until quota resets
|
| 319 |
+
"reason": str, # e.g., "QUOTA_EXHAUSTED", "RATE_LIMITED"
|
| 320 |
+
"reset_timestamp": str | None, # ISO timestamp if available
|
| 321 |
+
"quota_reset_timestamp": float | None, # Unix timestamp for quota reset
|
| 322 |
+
}
|
| 323 |
+
"""
|
| 324 |
+
return None # Default: no provider-specific parsing
|
| 325 |
+
|
| 326 |
+
# =========================================================================
|
| 327 |
+
# Per-Provider Usage Tracking Configuration
|
| 328 |
+
# =========================================================================
|
| 329 |
+
|
| 330 |
+
# =========================================================================
|
| 331 |
+
# USAGE RESET CONFIG LOGIC (Centralized)
|
| 332 |
+
# =========================================================================
|
| 333 |
+
|
| 334 |
+
def _find_usage_config_for_priority(
|
| 335 |
+
self, priority: int
|
| 336 |
+
) -> Optional[UsageResetConfigDef]:
|
| 337 |
+
"""
|
| 338 |
+
Find usage config that applies to a priority value.
|
| 339 |
+
|
| 340 |
+
Checks frozenset keys first (priority must be in the set),
|
| 341 |
+
then falls back to "default" key if no match found.
|
| 342 |
+
|
| 343 |
+
Args:
|
| 344 |
+
priority: The credential priority level
|
| 345 |
+
|
| 346 |
+
Returns:
|
| 347 |
+
UsageResetConfigDef if found, None otherwise
|
| 348 |
+
"""
|
| 349 |
+
# First, check frozenset keys for explicit priority match
|
| 350 |
+
for key, config in self.usage_reset_configs.items():
|
| 351 |
+
if isinstance(key, frozenset) and priority in key:
|
| 352 |
+
return config
|
| 353 |
+
|
| 354 |
+
# Fall back to "default" key
|
| 355 |
+
return self.usage_reset_configs.get("default")
|
| 356 |
+
|
| 357 |
+
def _build_usage_reset_config(
|
| 358 |
+
self, tier_name: Optional[str]
|
| 359 |
+
) -> Optional[Dict[str, Any]]:
|
| 360 |
+
"""
|
| 361 |
+
Build usage reset configuration dict for a tier.
|
| 362 |
+
|
| 363 |
+
Resolves tier to priority, then finds matching usage config.
|
| 364 |
+
Returns None if provider doesn't define usage_reset_configs.
|
| 365 |
+
|
| 366 |
+
Args:
|
| 367 |
+
tier_name: The tier name string
|
| 368 |
+
|
| 369 |
+
Returns:
|
| 370 |
+
Usage config dict with window_seconds, mode, priority, description,
|
| 371 |
+
field_name, or None if no config applies
|
| 372 |
+
"""
|
| 373 |
+
if not self.usage_reset_configs:
|
| 374 |
+
return None
|
| 375 |
+
|
| 376 |
+
priority = self._resolve_tier_priority(tier_name)
|
| 377 |
+
config = self._find_usage_config_for_priority(priority)
|
| 378 |
+
|
| 379 |
+
if config is None:
|
| 380 |
+
return None
|
| 381 |
+
|
| 382 |
+
return {
|
| 383 |
+
"window_seconds": config.window_seconds,
|
| 384 |
+
"mode": config.mode,
|
| 385 |
+
"priority": priority,
|
| 386 |
+
"description": config.description,
|
| 387 |
+
"field_name": config.field_name,
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
|
| 391 |
+
"""
|
| 392 |
+
Get provider-specific usage tracking configuration for a credential.
|
| 393 |
+
|
| 394 |
+
Uses the provider's usage_reset_configs class attribute to build
|
| 395 |
+
the configuration dict. Priority is auto-derived from tier.
|
| 396 |
+
|
| 397 |
+
Subclasses should define usage_reset_configs as a class attribute
|
| 398 |
+
instead of overriding this method. Only override get_credential_tier_name()
|
| 399 |
+
to provide the tier lookup mechanism.
|
| 400 |
+
|
| 401 |
+
The UsageManager will use this configuration to:
|
| 402 |
+
1. Track usage per-model or per-credential based on mode
|
| 403 |
+
2. Reset usage based on a rolling window OR quota exhausted timestamp
|
| 404 |
+
3. Archive stats to "global" when the window/quota expires
|
| 405 |
+
|
| 406 |
+
Args:
|
| 407 |
+
credential: The credential identifier (API key or path)
|
| 408 |
+
|
| 409 |
+
Returns:
|
| 410 |
+
None to use default daily reset, otherwise a dict with:
|
| 411 |
+
{
|
| 412 |
+
"window_seconds": int, # Duration in seconds (e.g., 18000 for 5h)
|
| 413 |
+
"mode": str, # "credential" or "per_model"
|
| 414 |
+
"priority": int, # Priority level (auto-derived from tier)
|
| 415 |
+
"description": str, # Human-readable description (for logging)
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
Modes:
|
| 419 |
+
- "credential": One window per credential. Window starts from first
|
| 420 |
+
request of ANY model. All models reset together when window expires.
|
| 421 |
+
- "per_model": Separate window per model (or model group). Window starts
|
| 422 |
+
from first request of THAT model. Models reset independently unless
|
| 423 |
+
grouped. If a quota_exhausted error provides exact reset time, that
|
| 424 |
+
becomes the authoritative reset time for the model.
|
| 425 |
+
"""
|
| 426 |
+
tier = self.get_credential_tier_name(credential)
|
| 427 |
+
return self._build_usage_reset_config(tier)
|
| 428 |
+
|
| 429 |
+
def get_default_usage_field_name(self) -> str:
|
| 430 |
+
"""
|
| 431 |
+
Get the default usage tracking field name for this provider.
|
| 432 |
+
|
| 433 |
+
Providers can override this to use a custom field name for usage tracking
|
| 434 |
+
when no credential-specific config is available.
|
| 435 |
+
|
| 436 |
+
Returns:
|
| 437 |
+
Field name string (default: "daily")
|
| 438 |
+
"""
|
| 439 |
+
return "daily"
|
| 440 |
+
|
| 441 |
+
# =========================================================================
|
| 442 |
+
# Model Quota Grouping
|
| 443 |
+
# =========================================================================
|
| 444 |
+
|
| 445 |
+
# =========================================================================
|
| 446 |
+
# QUOTA GROUPS LOGIC (Centralized)
|
| 447 |
+
# =========================================================================
|
| 448 |
+
|
| 449 |
+
def _get_effective_quota_groups(self) -> QuotaGroupMap:
|
| 450 |
+
"""
|
| 451 |
+
Get quota groups with .env overrides applied.
|
| 452 |
+
|
| 453 |
+
Env format: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
|
| 454 |
+
Set empty string to disable a default group.
|
| 455 |
+
"""
|
| 456 |
+
if not self.provider_env_name or not self.model_quota_groups:
|
| 457 |
+
return self.model_quota_groups
|
| 458 |
+
|
| 459 |
+
result: QuotaGroupMap = {}
|
| 460 |
+
|
| 461 |
+
for group_name, default_models in self.model_quota_groups.items():
|
| 462 |
+
env_key = (
|
| 463 |
+
f"QUOTA_GROUPS_{self.provider_env_name.upper()}_{group_name.upper()}"
|
| 464 |
+
)
|
| 465 |
+
env_value = os.getenv(env_key)
|
| 466 |
+
|
| 467 |
+
if env_value is not None:
|
| 468 |
+
# Env override present
|
| 469 |
+
if env_value.strip():
|
| 470 |
+
# Parse comma-separated models
|
| 471 |
+
result[group_name] = [
|
| 472 |
+
m.strip() for m in env_value.split(",") if m.strip()
|
| 473 |
+
]
|
| 474 |
+
# Empty string = group disabled, don't add to result
|
| 475 |
+
else:
|
| 476 |
+
# Use default
|
| 477 |
+
result[group_name] = list(default_models)
|
| 478 |
+
|
| 479 |
+
return result
|
| 480 |
+
|
| 481 |
+
def _find_model_quota_group(self, model: str) -> Optional[str]:
|
| 482 |
+
"""Find which quota group a model belongs to."""
|
| 483 |
+
groups = self._get_effective_quota_groups()
|
| 484 |
+
for group_name, models in groups.items():
|
| 485 |
+
if model in models:
|
| 486 |
+
return group_name
|
| 487 |
+
return None
|
| 488 |
+
|
| 489 |
+
def _get_quota_group_models(self, group: str) -> List[str]:
|
| 490 |
+
"""Get all models in a quota group."""
|
| 491 |
+
groups = self._get_effective_quota_groups()
|
| 492 |
+
return groups.get(group, [])
|
| 493 |
+
|
| 494 |
+
def get_model_quota_group(self, model: str) -> Optional[str]:
|
| 495 |
+
"""
|
| 496 |
+
Returns the quota group name for a model, or None if not grouped.
|
| 497 |
+
|
| 498 |
+
Uses the provider's model_quota_groups class attribute with .env overrides
|
| 499 |
+
via QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2".
|
| 500 |
+
|
| 501 |
+
Models in the same quota group share cooldown timing - when one model
|
| 502 |
+
hits a quota exhausted error, all models in the group get the same
|
| 503 |
+
reset timestamp. They also reset (archive stats) together.
|
| 504 |
+
|
| 505 |
+
Subclasses should define model_quota_groups as a class attribute
|
| 506 |
+
instead of overriding this method.
|
| 507 |
+
|
| 508 |
+
Args:
|
| 509 |
+
model: Model name (with or without provider prefix)
|
| 510 |
+
|
| 511 |
+
Returns:
|
| 512 |
+
Group name string (e.g., "claude") or None if model is not grouped
|
| 513 |
+
"""
|
| 514 |
+
# Strip provider prefix if present
|
| 515 |
+
clean_model = model.split("/")[-1] if "/" in model else model
|
| 516 |
+
return self._find_model_quota_group(clean_model)
|
| 517 |
+
|
| 518 |
+
def get_models_in_quota_group(self, group: str) -> List[str]:
|
| 519 |
+
"""
|
| 520 |
+
Returns all model names that belong to a quota group.
|
| 521 |
+
|
| 522 |
+
Uses the provider's model_quota_groups class attribute with .env overrides.
|
| 523 |
+
|
| 524 |
+
Args:
|
| 525 |
+
group: Group name (e.g., "claude")
|
| 526 |
+
|
| 527 |
+
Returns:
|
| 528 |
+
List of model names (WITHOUT provider prefix) in the group.
|
| 529 |
+
Empty list if group doesn't exist.
|
| 530 |
+
"""
|
| 531 |
+
return self._get_quota_group_models(group)
|
| 532 |
+
|
| 533 |
+
def get_model_usage_weight(self, model: str) -> int:
|
| 534 |
+
"""
|
| 535 |
+
Returns the usage weight for a model when calculating grouped usage.
|
| 536 |
+
|
| 537 |
+
Models with higher weights contribute more to the combined group usage.
|
| 538 |
+
This accounts for models that consume more quota per request.
|
| 539 |
+
|
| 540 |
+
Args:
|
| 541 |
+
model: Model name (with or without provider prefix)
|
| 542 |
+
|
| 543 |
+
Returns:
|
| 544 |
+
Weight multiplier (default 1 if not configured)
|
| 545 |
+
"""
|
| 546 |
+
# Strip provider prefix if present
|
| 547 |
+
clean_model = model.split("/")[-1] if "/" in model else model
|
| 548 |
+
return self.model_usage_weights.get(clean_model, 1)
|
src/rotator_library/usage_manager.py
CHANGED
|
@@ -5,7 +5,7 @@ import logging
|
|
| 5 |
import asyncio
|
| 6 |
import random
|
| 7 |
from datetime import date, datetime, timezone, time as dt_time
|
| 8 |
-
from typing import Any, Dict, List, Optional, Set
|
| 9 |
import aiofiles
|
| 10 |
import litellm
|
| 11 |
|
|
@@ -42,6 +42,10 @@ class UsageManager:
|
|
| 42 |
|
| 43 |
This ensures lower-usage credentials are preferred while tolerance controls how much
|
| 44 |
randomness is introduced into the selection process.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
"""
|
| 46 |
|
| 47 |
def __init__(
|
|
@@ -49,6 +53,13 @@ class UsageManager:
|
|
| 49 |
file_path: str = "key_usage.json",
|
| 50 |
daily_reset_time_utc: Optional[str] = "03:00",
|
| 51 |
rotation_tolerance: float = 0.0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
):
|
| 53 |
"""
|
| 54 |
Initialize the UsageManager.
|
|
@@ -60,9 +71,28 @@ class UsageManager:
|
|
| 60 |
- 0.0: Deterministic, least-used credential always selected
|
| 61 |
- tolerance = 2.0 - 4.0 (default, recommended): Balanced randomness, can pick credentials within 2 uses of max
|
| 62 |
- 5.0+: High randomness, more unpredictable selection patterns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
"""
|
| 64 |
self.file_path = file_path
|
| 65 |
self.rotation_tolerance = rotation_tolerance
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
self.key_states: Dict[str, Dict[str, Any]] = {}
|
| 67 |
|
| 68 |
self._data_lock = asyncio.Lock()
|
|
@@ -81,6 +111,426 @@ class UsageManager:
|
|
| 81 |
else:
|
| 82 |
self.daily_reset_time_utc = None
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
async def _lazy_init(self):
|
| 85 |
"""Initializes the usage data by loading it from the file asynchronously."""
|
| 86 |
async with self._init_lock:
|
|
@@ -107,85 +557,412 @@ class UsageManager:
|
|
| 107 |
if self._usage_data is None:
|
| 108 |
return
|
| 109 |
async with self._data_lock:
|
|
|
|
|
|
|
| 110 |
async with aiofiles.open(self.file_path, "w") as f:
|
| 111 |
await f.write(json.dumps(self._usage_data, indent=2))
|
| 112 |
|
| 113 |
async def _reset_daily_stats_if_needed(self):
|
| 114 |
-
"""
|
| 115 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
return
|
| 117 |
|
| 118 |
now_utc = datetime.now(timezone.utc)
|
|
|
|
| 119 |
today_str = now_utc.date().isoformat()
|
| 120 |
needs_saving = False
|
| 121 |
|
| 122 |
for key, data in self._usage_data.items():
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
if last_reset_str != today_str:
|
| 126 |
-
last_reset_dt = None
|
| 127 |
-
if last_reset_str:
|
| 128 |
-
# Ensure the parsed datetime is timezone-aware (UTC)
|
| 129 |
-
last_reset_dt = datetime.fromisoformat(last_reset_str).replace(
|
| 130 |
-
tzinfo=timezone.utc
|
| 131 |
-
)
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
now_utc.date(), self.daily_reset_time_utc
|
| 136 |
-
)
|
| 137 |
|
| 138 |
-
if
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
lib_logger.debug(
|
| 143 |
-
f"Performing daily reset for key {mask_credential(key)}"
|
| 144 |
)
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
# Archive global stats from the previous day's 'daily'
|
| 156 |
-
daily_data = data.get("daily", {})
|
| 157 |
-
if daily_data:
|
| 158 |
-
global_data = data.setdefault("global", {"models": {}})
|
| 159 |
-
for model, stats in daily_data.get("models", {}).items():
|
| 160 |
-
global_model_stats = global_data["models"].setdefault(
|
| 161 |
-
model,
|
| 162 |
-
{
|
| 163 |
-
"success_count": 0,
|
| 164 |
-
"prompt_tokens": 0,
|
| 165 |
-
"completion_tokens": 0,
|
| 166 |
-
"approx_cost": 0.0,
|
| 167 |
-
},
|
| 168 |
-
)
|
| 169 |
-
global_model_stats["success_count"] += stats.get(
|
| 170 |
-
"success_count", 0
|
| 171 |
-
)
|
| 172 |
-
global_model_stats["prompt_tokens"] += stats.get(
|
| 173 |
-
"prompt_tokens", 0
|
| 174 |
-
)
|
| 175 |
-
global_model_stats["completion_tokens"] += stats.get(
|
| 176 |
-
"completion_tokens", 0
|
| 177 |
-
)
|
| 178 |
-
global_model_stats["approx_cost"] += stats.get(
|
| 179 |
-
"approx_cost", 0.0
|
| 180 |
-
)
|
| 181 |
-
|
| 182 |
-
# Reset daily stats
|
| 183 |
-
data["daily"] = {"date": today_str, "models": {}}
|
| 184 |
-
data["last_daily_reset"] = today_str
|
| 185 |
|
| 186 |
if needs_saving:
|
| 187 |
await self._save_usage()
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
def _initialize_key_states(self, keys: List[str]):
|
| 190 |
"""Initializes state tracking for all provided keys if not already present."""
|
| 191 |
for key in keys:
|
|
@@ -306,12 +1083,8 @@ class UsageManager:
|
|
| 306 |
priority = credential_priorities.get(key, 999)
|
| 307 |
|
| 308 |
# Get usage count for load balancing within priority groups
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
.get("models", {})
|
| 312 |
-
.get(model, {})
|
| 313 |
-
.get("success_count", 0)
|
| 314 |
-
)
|
| 315 |
|
| 316 |
# Group by priority
|
| 317 |
if priority not in priority_groups:
|
|
@@ -324,6 +1097,16 @@ class UsageManager:
|
|
| 324 |
for priority_level in sorted_priorities:
|
| 325 |
keys_in_priority = priority_groups[priority_level]
|
| 326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
# Within each priority group, use existing tier1/tier2 logic
|
| 328 |
tier1_keys, tier2_keys = [], []
|
| 329 |
for key, usage_count in keys_in_priority:
|
|
@@ -333,18 +1116,27 @@ class UsageManager:
|
|
| 333 |
if not key_state["models_in_use"]:
|
| 334 |
tier1_keys.append((key, usage_count))
|
| 335 |
# Tier 2: Keys that can accept more concurrent requests
|
| 336 |
-
elif
|
|
|
|
|
|
|
|
|
|
| 337 |
tier2_keys.append((key, usage_count))
|
| 338 |
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
if tier1_keys:
|
| 349 |
selected_key = self._select_weighted_random(
|
| 350 |
tier1_keys, self.rotation_tolerance
|
|
@@ -361,6 +1153,7 @@ class UsageManager:
|
|
| 361 |
]
|
| 362 |
else:
|
| 363 |
# Deterministic: sort by usage within each tier
|
|
|
|
| 364 |
tier1_keys.sort(key=lambda x: x[1])
|
| 365 |
tier2_keys.sort(key=lambda x: x[1])
|
| 366 |
|
|
@@ -386,7 +1179,7 @@ class UsageManager:
|
|
| 386 |
state = self.key_states[key]
|
| 387 |
async with state["lock"]:
|
| 388 |
current_count = state["models_in_use"].get(model, 0)
|
| 389 |
-
if current_count <
|
| 390 |
state["models_in_use"][model] = current_count + 1
|
| 391 |
tier_name = (
|
| 392 |
credential_tier_names.get(key, "unknown")
|
|
@@ -395,7 +1188,7 @@ class UsageManager:
|
|
| 395 |
)
|
| 396 |
lib_logger.info(
|
| 397 |
f"Acquired key {mask_credential(key)} for model {model} "
|
| 398 |
-
f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{
|
| 399 |
)
|
| 400 |
return key
|
| 401 |
|
|
@@ -424,6 +1217,19 @@ class UsageManager:
|
|
| 424 |
|
| 425 |
else:
|
| 426 |
# Original logic when no priorities specified
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
tier1_keys, tier2_keys = [], []
|
| 428 |
|
| 429 |
# First, filter the list of available keys to exclude any on cooldown.
|
|
@@ -437,28 +1243,35 @@ class UsageManager:
|
|
| 437 |
continue
|
| 438 |
|
| 439 |
# Prioritize keys based on their current usage to ensure load balancing.
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
.get("models", {})
|
| 443 |
-
.get(model, {})
|
| 444 |
-
.get("success_count", 0)
|
| 445 |
-
)
|
| 446 |
key_state = self.key_states[key]
|
| 447 |
|
| 448 |
# Tier 1: Completely idle keys (preferred).
|
| 449 |
if not key_state["models_in_use"]:
|
| 450 |
tier1_keys.append((key, usage_count))
|
| 451 |
# Tier 2: Keys that can accept more concurrent requests for this model.
|
| 452 |
-
elif
|
|
|
|
|
|
|
|
|
|
| 453 |
tier2_keys.append((key, usage_count))
|
| 454 |
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
if tier1_keys:
|
| 463 |
selected_key = self._select_weighted_random(
|
| 464 |
tier1_keys, self.rotation_tolerance
|
|
@@ -475,6 +1288,7 @@ class UsageManager:
|
|
| 475 |
]
|
| 476 |
else:
|
| 477 |
# Deterministic: sort by usage within each tier
|
|
|
|
| 478 |
tier1_keys.sort(key=lambda x: x[1])
|
| 479 |
tier2_keys.sort(key=lambda x: x[1])
|
| 480 |
|
|
@@ -501,7 +1315,7 @@ class UsageManager:
|
|
| 501 |
state = self.key_states[key]
|
| 502 |
async with state["lock"]:
|
| 503 |
current_count = state["models_in_use"].get(model, 0)
|
| 504 |
-
if current_count <
|
| 505 |
state["models_in_use"][model] = current_count + 1
|
| 506 |
tier_name = (
|
| 507 |
credential_tier_names.get(key)
|
|
@@ -511,7 +1325,7 @@ class UsageManager:
|
|
| 511 |
tier_info = f"tier: {tier_name}, " if tier_name else ""
|
| 512 |
lib_logger.info(
|
| 513 |
f"Acquired key {mask_credential(key)} for model {model} "
|
| 514 |
-
f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{
|
| 515 |
)
|
| 516 |
return key
|
| 517 |
|
|
@@ -585,70 +1399,131 @@ class UsageManager:
|
|
| 585 |
"""
|
| 586 |
Records a successful API call, resetting failure counters.
|
| 587 |
It safely handles cases where token usage data is not available.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
"""
|
| 589 |
await self._lazy_init()
|
| 590 |
async with self._data_lock:
|
|
|
|
| 591 |
today_utc_str = datetime.now(timezone.utc).date().isoformat()
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
"global": {"models": {}},
|
| 597 |
-
"model_cooldowns": {},
|
| 598 |
-
"failures": {},
|
| 599 |
-
},
|
| 600 |
)
|
| 601 |
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
key_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
|
| 606 |
-
#
|
| 607 |
model_failures = key_data.setdefault("failures", {}).setdefault(model, {})
|
| 608 |
model_failures["consecutive_failures"] = 0
|
|
|
|
|
|
|
| 609 |
if model in key_data.get("model_cooldowns", {}):
|
| 610 |
del key_data["model_cooldowns"][model]
|
| 611 |
|
| 612 |
-
|
| 613 |
-
model,
|
| 614 |
-
{
|
| 615 |
-
"success_count": 0,
|
| 616 |
-
"prompt_tokens": 0,
|
| 617 |
-
"completion_tokens": 0,
|
| 618 |
-
"approx_cost": 0.0,
|
| 619 |
-
},
|
| 620 |
-
)
|
| 621 |
-
daily_model_data["success_count"] += 1
|
| 622 |
-
|
| 623 |
-
# Safely attempt to record token and cost usage
|
| 624 |
if (
|
| 625 |
completion_response
|
| 626 |
and hasattr(completion_response, "usage")
|
| 627 |
and completion_response.usage
|
| 628 |
):
|
| 629 |
usage = completion_response.usage
|
| 630 |
-
|
| 631 |
-
|
| 632 |
usage, "completion_tokens", 0
|
| 633 |
-
)
|
| 634 |
lib_logger.info(
|
| 635 |
f"Recorded usage from response object for key {mask_credential(key)}"
|
| 636 |
)
|
| 637 |
try:
|
| 638 |
provider_name = model.split("/")[0]
|
| 639 |
-
|
| 640 |
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
provider_plugin, "skip_cost_calculation", False
|
| 644 |
):
|
| 645 |
lib_logger.debug(
|
| 646 |
f"Skipping cost calculation for provider '{provider_name}' (custom provider)."
|
| 647 |
)
|
| 648 |
else:
|
| 649 |
-
# Differentiate cost calculation based on response type
|
| 650 |
if isinstance(completion_response, litellm.EmbeddingResponse):
|
| 651 |
-
# Manually calculate cost for embeddings
|
| 652 |
model_info = litellm.get_model_info(model)
|
| 653 |
input_cost = model_info.get("input_cost_per_token")
|
| 654 |
if input_cost:
|
|
@@ -663,7 +1538,7 @@ class UsageManager:
|
|
| 663 |
)
|
| 664 |
|
| 665 |
if cost is not None:
|
| 666 |
-
|
| 667 |
except Exception as e:
|
| 668 |
lib_logger.warning(
|
| 669 |
f"Could not calculate cost for model {model}: {e}"
|
|
@@ -671,14 +1546,13 @@ class UsageManager:
|
|
| 671 |
elif isinstance(completion_response, asyncio.Future) or hasattr(
|
| 672 |
completion_response, "__aiter__"
|
| 673 |
):
|
| 674 |
-
#
|
| 675 |
-
pass
|
| 676 |
else:
|
| 677 |
lib_logger.warning(
|
| 678 |
f"No usage data found in completion response for model {model}. Recording success without token count."
|
| 679 |
)
|
| 680 |
|
| 681 |
-
key_data["last_used_ts"] =
|
| 682 |
|
| 683 |
await self._save_usage()
|
| 684 |
|
|
@@ -689,7 +1563,13 @@ class UsageManager:
|
|
| 689 |
classified_error: ClassifiedError,
|
| 690 |
increment_consecutive_failures: bool = True,
|
| 691 |
):
|
| 692 |
-
"""Records a failure and applies cooldowns based on
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 693 |
|
| 694 |
Args:
|
| 695 |
key: The API key or credential identifier
|
|
@@ -700,17 +1580,36 @@ class UsageManager:
|
|
| 700 |
"""
|
| 701 |
await self._lazy_init()
|
| 702 |
async with self._data_lock:
|
|
|
|
| 703 |
today_utc_str = datetime.now(timezone.utc).date().isoformat()
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
"global": {"models": {}},
|
| 709 |
-
"model_cooldowns": {},
|
| 710 |
-
"failures": {},
|
| 711 |
-
},
|
| 712 |
)
|
| 713 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
# Provider-level errors (transient issues) should not count against the key
|
| 715 |
provider_level_errors = {"server_error", "api_connection"}
|
| 716 |
|
|
@@ -722,22 +1621,94 @@ class UsageManager:
|
|
| 722 |
|
| 723 |
# Calculate cooldown duration based on error type
|
| 724 |
cooldown_seconds = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 725 |
|
| 726 |
-
|
| 727 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
cooldown_seconds = classified_error.retry_after or 60
|
|
|
|
| 729 |
lib_logger.info(
|
| 730 |
-
f"Rate limit
|
| 731 |
-
f"
|
| 732 |
)
|
|
|
|
| 733 |
elif classified_error.error_type == "authentication":
|
| 734 |
# Apply a 5-minute key-level lockout for auth errors
|
| 735 |
-
key_data["key_cooldown_until"] =
|
|
|
|
|
|
|
| 736 |
lib_logger.warning(
|
| 737 |
f"Authentication error on key {mask_credential(key)}. Applying 5-minute key-level lockout."
|
| 738 |
)
|
| 739 |
-
# Auth errors still use escalating backoff for the specific model
|
| 740 |
-
cooldown_seconds = 300 # 5 minutes for model cooldown
|
| 741 |
|
| 742 |
# If we should increment failures, calculate escalating backoff
|
| 743 |
if should_increment:
|
|
@@ -751,35 +1722,27 @@ class UsageManager:
|
|
| 751 |
# If cooldown wasn't set by specific error type, use escalating backoff
|
| 752 |
if cooldown_seconds is None:
|
| 753 |
backoff_tiers = {1: 10, 2: 30, 3: 60, 4: 120}
|
| 754 |
-
cooldown_seconds = backoff_tiers.get(
|
| 755 |
-
|
| 756 |
-
) # Default to 2 hours for "spent" keys
|
| 757 |
lib_logger.warning(
|
| 758 |
f"Failure #{count} for key {mask_credential(key)} with model {model}. "
|
| 759 |
-
f"Error type: {classified_error.error_type}"
|
| 760 |
)
|
| 761 |
else:
|
| 762 |
# Provider-level errors: apply short cooldown but don't count against key
|
| 763 |
if cooldown_seconds is None:
|
| 764 |
-
cooldown_seconds = 30
|
|
|
|
| 765 |
lib_logger.info(
|
| 766 |
-
f"Provider-level error ({classified_error.error_type}) for key {mask_credential(key)}
|
| 767 |
-
f"NOT incrementing
|
| 768 |
)
|
| 769 |
|
| 770 |
-
# Apply the cooldown
|
| 771 |
-
model_cooldowns = key_data.setdefault("model_cooldowns", {})
|
| 772 |
-
model_cooldowns[model] = time.time() + cooldown_seconds
|
| 773 |
-
lib_logger.warning(
|
| 774 |
-
f"Cooldown applied for key {mask_credential(key)} with model {model}: {cooldown_seconds}s. "
|
| 775 |
-
f"Error type: {classified_error.error_type}"
|
| 776 |
-
)
|
| 777 |
-
|
| 778 |
# Check for key-level lockout condition
|
| 779 |
await self._check_key_lockout(key, key_data)
|
| 780 |
|
| 781 |
key_data["last_failure"] = {
|
| 782 |
-
"timestamp":
|
| 783 |
"model": model,
|
| 784 |
"error": str(classified_error.original_exception),
|
| 785 |
}
|
|
|
|
| 5 |
import asyncio
|
| 6 |
import random
|
| 7 |
from datetime import date, datetime, timezone, time as dt_time
|
| 8 |
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
| 9 |
import aiofiles
|
| 10 |
import litellm
|
| 11 |
|
|
|
|
| 42 |
|
| 43 |
This ensures lower-usage credentials are preferred while tolerance controls how much
|
| 44 |
randomness is introduced into the selection process.
|
| 45 |
+
|
| 46 |
+
Additionally, providers can specify a rotation mode:
|
| 47 |
+
- "balanced" (default): Rotate credentials to distribute load evenly
|
| 48 |
+
- "sequential": Use one credential until exhausted (preserves caching)
|
| 49 |
"""
|
| 50 |
|
| 51 |
def __init__(
|
|
|
|
| 53 |
file_path: str = "key_usage.json",
|
| 54 |
daily_reset_time_utc: Optional[str] = "03:00",
|
| 55 |
rotation_tolerance: float = 0.0,
|
| 56 |
+
provider_rotation_modes: Optional[Dict[str, str]] = None,
|
| 57 |
+
provider_plugins: Optional[Dict[str, Any]] = None,
|
| 58 |
+
priority_multipliers: Optional[Dict[str, Dict[int, int]]] = None,
|
| 59 |
+
priority_multipliers_by_mode: Optional[
|
| 60 |
+
Dict[str, Dict[str, Dict[int, int]]]
|
| 61 |
+
] = None,
|
| 62 |
+
sequential_fallback_multipliers: Optional[Dict[str, int]] = None,
|
| 63 |
):
|
| 64 |
"""
|
| 65 |
Initialize the UsageManager.
|
|
|
|
| 71 |
- 0.0: Deterministic, least-used credential always selected
|
| 72 |
- tolerance = 2.0 - 4.0 (default, recommended): Balanced randomness, can pick credentials within 2 uses of max
|
| 73 |
- 5.0+: High randomness, more unpredictable selection patterns
|
| 74 |
+
provider_rotation_modes: Dict mapping provider names to rotation modes.
|
| 75 |
+
- "balanced": Rotate credentials to distribute load evenly (default)
|
| 76 |
+
- "sequential": Use one credential until exhausted (preserves caching)
|
| 77 |
+
provider_plugins: Dict mapping provider names to provider plugin instances.
|
| 78 |
+
Used for per-provider usage reset configuration (window durations, field names).
|
| 79 |
+
priority_multipliers: Dict mapping provider -> priority -> multiplier.
|
| 80 |
+
Universal multipliers that apply regardless of rotation mode.
|
| 81 |
+
Example: {"antigravity": {1: 5, 2: 3}}
|
| 82 |
+
priority_multipliers_by_mode: Dict mapping provider -> mode -> priority -> multiplier.
|
| 83 |
+
Mode-specific overrides. Example: {"antigravity": {"balanced": {3: 1}}}
|
| 84 |
+
sequential_fallback_multipliers: Dict mapping provider -> fallback multiplier.
|
| 85 |
+
Used in sequential mode when priority not in priority_multipliers.
|
| 86 |
+
Example: {"antigravity": 2}
|
| 87 |
"""
|
| 88 |
self.file_path = file_path
|
| 89 |
self.rotation_tolerance = rotation_tolerance
|
| 90 |
+
self.provider_rotation_modes = provider_rotation_modes or {}
|
| 91 |
+
self.provider_plugins = provider_plugins or PROVIDER_PLUGINS
|
| 92 |
+
self.priority_multipliers = priority_multipliers or {}
|
| 93 |
+
self.priority_multipliers_by_mode = priority_multipliers_by_mode or {}
|
| 94 |
+
self.sequential_fallback_multipliers = sequential_fallback_multipliers or {}
|
| 95 |
+
self._provider_instances: Dict[str, Any] = {} # Cache for provider instances
|
| 96 |
self.key_states: Dict[str, Dict[str, Any]] = {}
|
| 97 |
|
| 98 |
self._data_lock = asyncio.Lock()
|
|
|
|
| 111 |
else:
|
| 112 |
self.daily_reset_time_utc = None
|
| 113 |
|
| 114 |
+
def _get_rotation_mode(self, provider: str) -> str:
|
| 115 |
+
"""
|
| 116 |
+
Get the rotation mode for a provider.
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
provider: Provider name (e.g., "antigravity", "gemini_cli")
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
"balanced" or "sequential"
|
| 123 |
+
"""
|
| 124 |
+
return self.provider_rotation_modes.get(provider, "balanced")
|
| 125 |
+
|
| 126 |
+
def _get_priority_multiplier(
|
| 127 |
+
self, provider: str, priority: int, rotation_mode: str
|
| 128 |
+
) -> int:
|
| 129 |
+
"""
|
| 130 |
+
Get the concurrency multiplier for a provider/priority/mode combination.
|
| 131 |
+
|
| 132 |
+
Lookup order:
|
| 133 |
+
1. Mode-specific tier override: priority_multipliers_by_mode[provider][mode][priority]
|
| 134 |
+
2. Universal tier multiplier: priority_multipliers[provider][priority]
|
| 135 |
+
3. Sequential fallback (if mode is sequential): sequential_fallback_multipliers[provider]
|
| 136 |
+
4. Global default: 1 (no multiplier effect)
|
| 137 |
+
|
| 138 |
+
Args:
|
| 139 |
+
provider: Provider name (e.g., "antigravity")
|
| 140 |
+
priority: Priority level (1 = highest priority)
|
| 141 |
+
rotation_mode: Current rotation mode ("sequential" or "balanced")
|
| 142 |
+
|
| 143 |
+
Returns:
|
| 144 |
+
Multiplier value
|
| 145 |
+
"""
|
| 146 |
+
provider_lower = provider.lower()
|
| 147 |
+
|
| 148 |
+
# 1. Check mode-specific override
|
| 149 |
+
if provider_lower in self.priority_multipliers_by_mode:
|
| 150 |
+
mode_multipliers = self.priority_multipliers_by_mode[provider_lower]
|
| 151 |
+
if rotation_mode in mode_multipliers:
|
| 152 |
+
if priority in mode_multipliers[rotation_mode]:
|
| 153 |
+
return mode_multipliers[rotation_mode][priority]
|
| 154 |
+
|
| 155 |
+
# 2. Check universal tier multiplier
|
| 156 |
+
if provider_lower in self.priority_multipliers:
|
| 157 |
+
if priority in self.priority_multipliers[provider_lower]:
|
| 158 |
+
return self.priority_multipliers[provider_lower][priority]
|
| 159 |
+
|
| 160 |
+
# 3. Sequential fallback (only for sequential mode)
|
| 161 |
+
if rotation_mode == "sequential":
|
| 162 |
+
if provider_lower in self.sequential_fallback_multipliers:
|
| 163 |
+
return self.sequential_fallback_multipliers[provider_lower]
|
| 164 |
+
|
| 165 |
+
# 4. Global default
|
| 166 |
+
return 1
|
| 167 |
+
|
| 168 |
+
def _get_provider_from_credential(self, credential: str) -> Optional[str]:
|
| 169 |
+
"""
|
| 170 |
+
Extract provider name from credential path or identifier.
|
| 171 |
+
|
| 172 |
+
Supports multiple credential formats:
|
| 173 |
+
- OAuth: "oauth_creds/antigravity_oauth_15.json" -> "antigravity"
|
| 174 |
+
- OAuth: "C:\\...\\oauth_creds\\gemini_cli_oauth_1.json" -> "gemini_cli"
|
| 175 |
+
- API key style: stored with provider prefix metadata
|
| 176 |
+
|
| 177 |
+
Args:
|
| 178 |
+
credential: The credential identifier (path or key)
|
| 179 |
+
|
| 180 |
+
Returns:
|
| 181 |
+
Provider name string or None if cannot be determined
|
| 182 |
+
"""
|
| 183 |
+
import re
|
| 184 |
+
|
| 185 |
+
# Normalize path separators
|
| 186 |
+
normalized = credential.replace("\\", "/")
|
| 187 |
+
|
| 188 |
+
# Pattern: {provider}_oauth_{number}.json
|
| 189 |
+
match = re.search(r"/([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
|
| 190 |
+
if match:
|
| 191 |
+
return match.group(1).lower()
|
| 192 |
+
|
| 193 |
+
# Pattern: oauth_creds/{provider}_...
|
| 194 |
+
match = re.search(r"oauth_creds/([a-z_]+)_", normalized, re.IGNORECASE)
|
| 195 |
+
if match:
|
| 196 |
+
return match.group(1).lower()
|
| 197 |
+
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
+
def _get_provider_instance(self, provider: str) -> Optional[Any]:
|
| 201 |
+
"""
|
| 202 |
+
Get or create a provider plugin instance.
|
| 203 |
+
|
| 204 |
+
Args:
|
| 205 |
+
provider: The provider name
|
| 206 |
+
|
| 207 |
+
Returns:
|
| 208 |
+
Provider plugin instance or None
|
| 209 |
+
"""
|
| 210 |
+
if not provider:
|
| 211 |
+
return None
|
| 212 |
+
|
| 213 |
+
plugin_class = self.provider_plugins.get(provider)
|
| 214 |
+
if not plugin_class:
|
| 215 |
+
return None
|
| 216 |
+
|
| 217 |
+
# Get or create provider instance from cache
|
| 218 |
+
if provider not in self._provider_instances:
|
| 219 |
+
# Instantiate the plugin if it's a class, or use it directly if already an instance
|
| 220 |
+
if isinstance(plugin_class, type):
|
| 221 |
+
self._provider_instances[provider] = plugin_class()
|
| 222 |
+
else:
|
| 223 |
+
self._provider_instances[provider] = plugin_class
|
| 224 |
+
|
| 225 |
+
return self._provider_instances[provider]
|
| 226 |
+
|
| 227 |
+
def _get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
|
| 228 |
+
"""
|
| 229 |
+
Get the usage reset configuration for a credential from its provider plugin.
|
| 230 |
+
|
| 231 |
+
Args:
|
| 232 |
+
credential: The credential identifier
|
| 233 |
+
|
| 234 |
+
Returns:
|
| 235 |
+
Configuration dict with window_seconds, field_name, etc.
|
| 236 |
+
or None to use default daily reset.
|
| 237 |
+
"""
|
| 238 |
+
provider = self._get_provider_from_credential(credential)
|
| 239 |
+
plugin_instance = self._get_provider_instance(provider)
|
| 240 |
+
|
| 241 |
+
if plugin_instance and hasattr(plugin_instance, "get_usage_reset_config"):
|
| 242 |
+
return plugin_instance.get_usage_reset_config(credential)
|
| 243 |
+
|
| 244 |
+
return None
|
| 245 |
+
|
| 246 |
+
def _get_reset_mode(self, credential: str) -> str:
|
| 247 |
+
"""
|
| 248 |
+
Get the reset mode for a credential: 'credential' or 'per_model'.
|
| 249 |
+
|
| 250 |
+
Args:
|
| 251 |
+
credential: The credential identifier
|
| 252 |
+
|
| 253 |
+
Returns:
|
| 254 |
+
"per_model" or "credential" (default)
|
| 255 |
+
"""
|
| 256 |
+
config = self._get_usage_reset_config(credential)
|
| 257 |
+
return config.get("mode", "credential") if config else "credential"
|
| 258 |
+
|
| 259 |
+
def _get_model_quota_group(self, credential: str, model: str) -> Optional[str]:
|
| 260 |
+
"""
|
| 261 |
+
Get the quota group for a model, if the provider defines one.
|
| 262 |
+
|
| 263 |
+
Args:
|
| 264 |
+
credential: The credential identifier
|
| 265 |
+
model: Model name (with or without provider prefix)
|
| 266 |
+
|
| 267 |
+
Returns:
|
| 268 |
+
Group name (e.g., "claude") or None if not grouped
|
| 269 |
+
"""
|
| 270 |
+
provider = self._get_provider_from_credential(credential)
|
| 271 |
+
plugin_instance = self._get_provider_instance(provider)
|
| 272 |
+
|
| 273 |
+
if plugin_instance and hasattr(plugin_instance, "get_model_quota_group"):
|
| 274 |
+
return plugin_instance.get_model_quota_group(model)
|
| 275 |
+
|
| 276 |
+
return None
|
| 277 |
+
|
| 278 |
+
def _get_grouped_models(self, credential: str, group: str) -> List[str]:
|
| 279 |
+
"""
|
| 280 |
+
Get all model names in a quota group (with provider prefix).
|
| 281 |
+
|
| 282 |
+
Args:
|
| 283 |
+
credential: The credential identifier
|
| 284 |
+
group: Group name (e.g., "claude")
|
| 285 |
+
|
| 286 |
+
Returns:
|
| 287 |
+
List of full model names (e.g., ["antigravity/claude-opus-4-5", ...])
|
| 288 |
+
"""
|
| 289 |
+
provider = self._get_provider_from_credential(credential)
|
| 290 |
+
plugin_instance = self._get_provider_instance(provider)
|
| 291 |
+
|
| 292 |
+
if plugin_instance and hasattr(plugin_instance, "get_models_in_quota_group"):
|
| 293 |
+
models = plugin_instance.get_models_in_quota_group(group)
|
| 294 |
+
# Add provider prefix
|
| 295 |
+
return [f"{provider}/{m}" for m in models]
|
| 296 |
+
|
| 297 |
+
return []
|
| 298 |
+
|
| 299 |
+
def _get_model_usage_weight(self, credential: str, model: str) -> int:
|
| 300 |
+
"""
|
| 301 |
+
Get the usage weight for a model when calculating grouped usage.
|
| 302 |
+
|
| 303 |
+
Args:
|
| 304 |
+
credential: The credential identifier
|
| 305 |
+
model: Model name (with or without provider prefix)
|
| 306 |
+
|
| 307 |
+
Returns:
|
| 308 |
+
Weight multiplier (default 1 if not configured)
|
| 309 |
+
"""
|
| 310 |
+
provider = self._get_provider_from_credential(credential)
|
| 311 |
+
plugin_instance = self._get_provider_instance(provider)
|
| 312 |
+
|
| 313 |
+
if plugin_instance and hasattr(plugin_instance, "get_model_usage_weight"):
|
| 314 |
+
return plugin_instance.get_model_usage_weight(model)
|
| 315 |
+
|
| 316 |
+
return 1
|
| 317 |
+
|
| 318 |
+
def _get_grouped_usage_count(self, key: str, model: str) -> int:
|
| 319 |
+
"""
|
| 320 |
+
Get usage count for credential selection, considering quota groups.
|
| 321 |
+
|
| 322 |
+
If the model belongs to a quota group, returns the weighted combined usage
|
| 323 |
+
across all models in the group. Otherwise returns individual model usage.
|
| 324 |
+
|
| 325 |
+
Weights are applied per-model to account for models that consume more quota
|
| 326 |
+
per request (e.g., Opus might count 2x compared to Sonnet).
|
| 327 |
+
|
| 328 |
+
Args:
|
| 329 |
+
key: Credential identifier
|
| 330 |
+
model: Model name (with provider prefix, e.g., "antigravity/claude-sonnet-4-5")
|
| 331 |
+
|
| 332 |
+
Returns:
|
| 333 |
+
Weighted combined usage if grouped, otherwise individual model usage
|
| 334 |
+
"""
|
| 335 |
+
# Check if model is in a quota group
|
| 336 |
+
group = self._get_model_quota_group(key, model)
|
| 337 |
+
|
| 338 |
+
if group:
|
| 339 |
+
# Get all models in the group
|
| 340 |
+
grouped_models = self._get_grouped_models(key, group)
|
| 341 |
+
|
| 342 |
+
# Sum weighted usage across all models in the group
|
| 343 |
+
total_weighted_usage = 0
|
| 344 |
+
for grouped_model in grouped_models:
|
| 345 |
+
usage = self._get_usage_count(key, grouped_model)
|
| 346 |
+
weight = self._get_model_usage_weight(key, grouped_model)
|
| 347 |
+
total_weighted_usage += usage * weight
|
| 348 |
+
return total_weighted_usage
|
| 349 |
+
|
| 350 |
+
# Not grouped - return individual model usage (no weight applied)
|
| 351 |
+
return self._get_usage_count(key, model)
|
| 352 |
+
|
| 353 |
+
def _get_usage_field_name(self, credential: str) -> str:
|
| 354 |
+
"""
|
| 355 |
+
Get the usage tracking field name for a credential.
|
| 356 |
+
|
| 357 |
+
Returns the provider-specific field name if configured,
|
| 358 |
+
otherwise falls back to "daily".
|
| 359 |
+
|
| 360 |
+
Args:
|
| 361 |
+
credential: The credential identifier
|
| 362 |
+
|
| 363 |
+
Returns:
|
| 364 |
+
Field name string (e.g., "5h_window", "weekly", "daily")
|
| 365 |
+
"""
|
| 366 |
+
config = self._get_usage_reset_config(credential)
|
| 367 |
+
if config and "field_name" in config:
|
| 368 |
+
return config["field_name"]
|
| 369 |
+
|
| 370 |
+
# Check provider default
|
| 371 |
+
provider = self._get_provider_from_credential(credential)
|
| 372 |
+
plugin_instance = self._get_provider_instance(provider)
|
| 373 |
+
|
| 374 |
+
if plugin_instance and hasattr(plugin_instance, "get_default_usage_field_name"):
|
| 375 |
+
return plugin_instance.get_default_usage_field_name()
|
| 376 |
+
|
| 377 |
+
return "daily"
|
| 378 |
+
|
| 379 |
+
def _get_usage_count(self, key: str, model: str) -> int:
|
| 380 |
+
"""
|
| 381 |
+
Get the current usage count for a model from the appropriate usage structure.
|
| 382 |
+
|
| 383 |
+
Supports both:
|
| 384 |
+
- New per-model structure: {"models": {"model_name": {"success_count": N, ...}}}
|
| 385 |
+
- Legacy structure: {"daily": {"models": {"model_name": {"success_count": N, ...}}}}
|
| 386 |
+
|
| 387 |
+
Args:
|
| 388 |
+
key: Credential identifier
|
| 389 |
+
model: Model name
|
| 390 |
+
|
| 391 |
+
Returns:
|
| 392 |
+
Usage count (success_count) for the model in the current window/period
|
| 393 |
+
"""
|
| 394 |
+
if self._usage_data is None:
|
| 395 |
+
return 0
|
| 396 |
+
|
| 397 |
+
key_data = self._usage_data.get(key, {})
|
| 398 |
+
reset_mode = self._get_reset_mode(key)
|
| 399 |
+
|
| 400 |
+
if reset_mode == "per_model":
|
| 401 |
+
# New per-model structure: key_data["models"][model]["success_count"]
|
| 402 |
+
return key_data.get("models", {}).get(model, {}).get("success_count", 0)
|
| 403 |
+
else:
|
| 404 |
+
# Legacy structure: key_data["daily"]["models"][model]["success_count"]
|
| 405 |
+
return (
|
| 406 |
+
key_data.get("daily", {})
|
| 407 |
+
.get("models", {})
|
| 408 |
+
.get(model, {})
|
| 409 |
+
.get("success_count", 0)
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
# =========================================================================
|
| 413 |
+
# TIMESTAMP FORMATTING HELPERS
|
| 414 |
+
# =========================================================================
|
| 415 |
+
|
| 416 |
+
def _format_timestamp_local(self, ts: Optional[float]) -> Optional[str]:
|
| 417 |
+
"""
|
| 418 |
+
Format Unix timestamp as local time string with timezone offset.
|
| 419 |
+
|
| 420 |
+
Args:
|
| 421 |
+
ts: Unix timestamp or None
|
| 422 |
+
|
| 423 |
+
Returns:
|
| 424 |
+
Formatted string like "2025-12-07 14:30:17 +0100" or None
|
| 425 |
+
"""
|
| 426 |
+
if ts is None:
|
| 427 |
+
return None
|
| 428 |
+
try:
|
| 429 |
+
dt = datetime.fromtimestamp(ts).astimezone() # Local timezone
|
| 430 |
+
# Use UTC offset for conciseness (works on all platforms)
|
| 431 |
+
return dt.strftime("%Y-%m-%d %H:%M:%S %z")
|
| 432 |
+
except (OSError, ValueError, OverflowError):
|
| 433 |
+
return None
|
| 434 |
+
|
| 435 |
+
def _add_readable_timestamps(self, data: Dict) -> Dict:
|
| 436 |
+
"""
|
| 437 |
+
Add human-readable timestamp fields to usage data before saving.
|
| 438 |
+
|
| 439 |
+
Adds 'window_started' and 'quota_resets' fields derived from
|
| 440 |
+
Unix timestamps for easier debugging and monitoring.
|
| 441 |
+
|
| 442 |
+
Args:
|
| 443 |
+
data: The usage data dict to enhance
|
| 444 |
+
|
| 445 |
+
Returns:
|
| 446 |
+
The same dict with readable timestamp fields added
|
| 447 |
+
"""
|
| 448 |
+
for key, key_data in data.items():
|
| 449 |
+
# Handle per-model structure
|
| 450 |
+
models = key_data.get("models", {})
|
| 451 |
+
for model_name, model_stats in models.items():
|
| 452 |
+
if not isinstance(model_stats, dict):
|
| 453 |
+
continue
|
| 454 |
+
|
| 455 |
+
# Add readable window start time
|
| 456 |
+
window_start = model_stats.get("window_start_ts")
|
| 457 |
+
if window_start:
|
| 458 |
+
model_stats["window_started"] = self._format_timestamp_local(
|
| 459 |
+
window_start
|
| 460 |
+
)
|
| 461 |
+
elif "window_started" in model_stats:
|
| 462 |
+
del model_stats["window_started"]
|
| 463 |
+
|
| 464 |
+
# Add readable reset time
|
| 465 |
+
quota_reset = model_stats.get("quota_reset_ts")
|
| 466 |
+
if quota_reset:
|
| 467 |
+
model_stats["quota_resets"] = self._format_timestamp_local(
|
| 468 |
+
quota_reset
|
| 469 |
+
)
|
| 470 |
+
elif "quota_resets" in model_stats:
|
| 471 |
+
del model_stats["quota_resets"]
|
| 472 |
+
|
| 473 |
+
return data
|
| 474 |
+
|
| 475 |
+
def _sort_sequential(
|
| 476 |
+
self,
|
| 477 |
+
candidates: List[Tuple[str, int]],
|
| 478 |
+
credential_priorities: Optional[Dict[str, int]] = None,
|
| 479 |
+
) -> List[Tuple[str, int]]:
|
| 480 |
+
"""
|
| 481 |
+
Sort credentials for sequential mode with position retention.
|
| 482 |
+
|
| 483 |
+
Credentials maintain their position based on established usage patterns,
|
| 484 |
+
ensuring that actively-used credentials remain primary until exhausted.
|
| 485 |
+
|
| 486 |
+
Sorting order (within each sort key, lower value = higher priority):
|
| 487 |
+
1. Priority tier (lower number = higher priority)
|
| 488 |
+
2. Usage count (higher = more established in rotation, maintains position)
|
| 489 |
+
3. Last used timestamp (higher = more recent, tiebreaker for stickiness)
|
| 490 |
+
4. Credential ID (alphabetical, stable ordering)
|
| 491 |
+
|
| 492 |
+
Args:
|
| 493 |
+
candidates: List of (credential_id, usage_count) tuples
|
| 494 |
+
credential_priorities: Optional dict mapping credentials to priority levels
|
| 495 |
+
|
| 496 |
+
Returns:
|
| 497 |
+
Sorted list of candidates (same format as input)
|
| 498 |
+
"""
|
| 499 |
+
if not candidates:
|
| 500 |
+
return []
|
| 501 |
+
|
| 502 |
+
if len(candidates) == 1:
|
| 503 |
+
return candidates
|
| 504 |
+
|
| 505 |
+
def sort_key(item: Tuple[str, int]) -> Tuple[int, int, float, str]:
|
| 506 |
+
cred, usage_count = item
|
| 507 |
+
priority = (
|
| 508 |
+
credential_priorities.get(cred, 999) if credential_priorities else 999
|
| 509 |
+
)
|
| 510 |
+
last_used = (
|
| 511 |
+
self._usage_data.get(cred, {}).get("last_used_ts", 0)
|
| 512 |
+
if self._usage_data
|
| 513 |
+
else 0
|
| 514 |
+
)
|
| 515 |
+
return (
|
| 516 |
+
priority, # ASC: lower priority number = higher priority
|
| 517 |
+
-usage_count, # DESC: higher usage = more established
|
| 518 |
+
-last_used, # DESC: more recent = preferred for ties
|
| 519 |
+
cred, # ASC: stable alphabetical ordering
|
| 520 |
+
)
|
| 521 |
+
|
| 522 |
+
sorted_candidates = sorted(candidates, key=sort_key)
|
| 523 |
+
|
| 524 |
+
# Debug logging - show top 3 credentials in ordering
|
| 525 |
+
if lib_logger.isEnabledFor(logging.DEBUG):
|
| 526 |
+
order_info = [
|
| 527 |
+
f"{mask_credential(c)}(p={credential_priorities.get(c, 999) if credential_priorities else 'N/A'}, u={u})"
|
| 528 |
+
for c, u in sorted_candidates[:3]
|
| 529 |
+
]
|
| 530 |
+
lib_logger.debug(f"Sequential ordering: {' → '.join(order_info)}")
|
| 531 |
+
|
| 532 |
+
return sorted_candidates
|
| 533 |
+
|
| 534 |
async def _lazy_init(self):
|
| 535 |
"""Initializes the usage data by loading it from the file asynchronously."""
|
| 536 |
async with self._init_lock:
|
|
|
|
| 557 |
if self._usage_data is None:
|
| 558 |
return
|
| 559 |
async with self._data_lock:
|
| 560 |
+
# Add human-readable timestamp fields before saving
|
| 561 |
+
self._add_readable_timestamps(self._usage_data)
|
| 562 |
async with aiofiles.open(self.file_path, "w") as f:
|
| 563 |
await f.write(json.dumps(self._usage_data, indent=2))
|
| 564 |
|
| 565 |
async def _reset_daily_stats_if_needed(self):
|
| 566 |
+
"""
|
| 567 |
+
Checks if usage stats need to be reset for any key.
|
| 568 |
+
|
| 569 |
+
Supports three reset modes:
|
| 570 |
+
1. per_model: Each model has its own window, resets based on quota_reset_ts or fallback window
|
| 571 |
+
2. credential: One window per credential (legacy with custom window duration)
|
| 572 |
+
3. daily: Legacy daily reset at daily_reset_time_utc
|
| 573 |
+
"""
|
| 574 |
+
if self._usage_data is None:
|
| 575 |
return
|
| 576 |
|
| 577 |
now_utc = datetime.now(timezone.utc)
|
| 578 |
+
now_ts = time.time()
|
| 579 |
today_str = now_utc.date().isoformat()
|
| 580 |
needs_saving = False
|
| 581 |
|
| 582 |
for key, data in self._usage_data.items():
|
| 583 |
+
reset_config = self._get_usage_reset_config(key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
|
| 585 |
+
if reset_config:
|
| 586 |
+
reset_mode = reset_config.get("mode", "credential")
|
|
|
|
|
|
|
| 587 |
|
| 588 |
+
if reset_mode == "per_model":
|
| 589 |
+
# Per-model window reset
|
| 590 |
+
needs_saving |= await self._check_per_model_resets(
|
| 591 |
+
key, data, reset_config, now_ts
|
|
|
|
|
|
|
| 592 |
)
|
| 593 |
+
else:
|
| 594 |
+
# Credential-level window reset (legacy)
|
| 595 |
+
needs_saving |= await self._check_window_reset(
|
| 596 |
+
key, data, reset_config, now_ts
|
| 597 |
+
)
|
| 598 |
+
elif self.daily_reset_time_utc:
|
| 599 |
+
# Legacy daily reset
|
| 600 |
+
needs_saving |= await self._check_daily_reset(
|
| 601 |
+
key, data, now_utc, today_str, now_ts
|
| 602 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
|
| 604 |
if needs_saving:
|
| 605 |
await self._save_usage()
|
| 606 |
|
| 607 |
+
async def _check_per_model_resets(
|
| 608 |
+
self,
|
| 609 |
+
key: str,
|
| 610 |
+
data: Dict[str, Any],
|
| 611 |
+
reset_config: Dict[str, Any],
|
| 612 |
+
now_ts: float,
|
| 613 |
+
) -> bool:
|
| 614 |
+
"""
|
| 615 |
+
Check and perform per-model resets for a credential.
|
| 616 |
+
|
| 617 |
+
Each model resets independently based on:
|
| 618 |
+
1. quota_reset_ts (authoritative, from quota exhausted error) if set
|
| 619 |
+
2. window_start_ts + window_seconds (fallback) otherwise
|
| 620 |
+
|
| 621 |
+
Grouped models reset together - all models in a group must be ready.
|
| 622 |
+
|
| 623 |
+
Args:
|
| 624 |
+
key: Credential identifier
|
| 625 |
+
data: Usage data for this credential
|
| 626 |
+
reset_config: Provider's reset configuration
|
| 627 |
+
now_ts: Current timestamp
|
| 628 |
+
|
| 629 |
+
Returns:
|
| 630 |
+
True if data was modified and needs saving
|
| 631 |
+
"""
|
| 632 |
+
window_seconds = reset_config.get("window_seconds", 86400)
|
| 633 |
+
models_data = data.get("models", {})
|
| 634 |
+
|
| 635 |
+
if not models_data:
|
| 636 |
+
return False
|
| 637 |
+
|
| 638 |
+
modified = False
|
| 639 |
+
processed_groups = set()
|
| 640 |
+
|
| 641 |
+
for model, model_data in list(models_data.items()):
|
| 642 |
+
# Check if this model is in a quota group
|
| 643 |
+
group = self._get_model_quota_group(key, model)
|
| 644 |
+
|
| 645 |
+
if group:
|
| 646 |
+
if group in processed_groups:
|
| 647 |
+
continue # Already handled this group
|
| 648 |
+
|
| 649 |
+
# Check if entire group should reset
|
| 650 |
+
if self._should_group_reset(
|
| 651 |
+
key, group, models_data, window_seconds, now_ts
|
| 652 |
+
):
|
| 653 |
+
# Archive and reset all models in group
|
| 654 |
+
grouped_models = self._get_grouped_models(key, group)
|
| 655 |
+
archived_count = 0
|
| 656 |
+
|
| 657 |
+
for grouped_model in grouped_models:
|
| 658 |
+
if grouped_model in models_data:
|
| 659 |
+
gm_data = models_data[grouped_model]
|
| 660 |
+
self._archive_model_to_global(data, grouped_model, gm_data)
|
| 661 |
+
self._reset_model_data(gm_data)
|
| 662 |
+
archived_count += 1
|
| 663 |
+
|
| 664 |
+
if archived_count > 0:
|
| 665 |
+
lib_logger.info(
|
| 666 |
+
f"Reset model group '{group}' ({archived_count} models) for {mask_credential(key)}"
|
| 667 |
+
)
|
| 668 |
+
modified = True
|
| 669 |
+
|
| 670 |
+
processed_groups.add(group)
|
| 671 |
+
|
| 672 |
+
else:
|
| 673 |
+
# Ungrouped model - check individually
|
| 674 |
+
if self._should_model_reset(model_data, window_seconds, now_ts):
|
| 675 |
+
self._archive_model_to_global(data, model, model_data)
|
| 676 |
+
self._reset_model_data(model_data)
|
| 677 |
+
lib_logger.info(f"Reset model {model} for {mask_credential(key)}")
|
| 678 |
+
modified = True
|
| 679 |
+
|
| 680 |
+
# Preserve unexpired cooldowns
|
| 681 |
+
if modified:
|
| 682 |
+
self._preserve_unexpired_cooldowns(key, data, now_ts)
|
| 683 |
+
if "failures" in data:
|
| 684 |
+
data["failures"] = {}
|
| 685 |
+
|
| 686 |
+
return modified
|
| 687 |
+
|
| 688 |
+
def _should_model_reset(
|
| 689 |
+
self, model_data: Dict[str, Any], window_seconds: int, now_ts: float
|
| 690 |
+
) -> bool:
|
| 691 |
+
"""
|
| 692 |
+
Check if a single model should reset.
|
| 693 |
+
|
| 694 |
+
Returns True if:
|
| 695 |
+
- quota_reset_ts is set AND now >= quota_reset_ts, OR
|
| 696 |
+
- quota_reset_ts is NOT set AND now >= window_start_ts + window_seconds
|
| 697 |
+
"""
|
| 698 |
+
quota_reset = model_data.get("quota_reset_ts")
|
| 699 |
+
window_start = model_data.get("window_start_ts")
|
| 700 |
+
|
| 701 |
+
if quota_reset:
|
| 702 |
+
return now_ts >= quota_reset
|
| 703 |
+
elif window_start:
|
| 704 |
+
return now_ts >= window_start + window_seconds
|
| 705 |
+
return False
|
| 706 |
+
|
| 707 |
+
def _should_group_reset(
|
| 708 |
+
self,
|
| 709 |
+
key: str,
|
| 710 |
+
group: str,
|
| 711 |
+
models_data: Dict[str, Dict],
|
| 712 |
+
window_seconds: int,
|
| 713 |
+
now_ts: float,
|
| 714 |
+
) -> bool:
|
| 715 |
+
"""
|
| 716 |
+
Check if all models in a group should reset.
|
| 717 |
+
|
| 718 |
+
All models in the group must be ready to reset.
|
| 719 |
+
If any model has an active cooldown/window, the whole group waits.
|
| 720 |
+
"""
|
| 721 |
+
grouped_models = self._get_grouped_models(key, group)
|
| 722 |
+
|
| 723 |
+
# Track if any model in group has data
|
| 724 |
+
any_has_data = False
|
| 725 |
+
|
| 726 |
+
for grouped_model in grouped_models:
|
| 727 |
+
model_data = models_data.get(grouped_model, {})
|
| 728 |
+
|
| 729 |
+
if not model_data or (
|
| 730 |
+
model_data.get("window_start_ts") is None
|
| 731 |
+
and model_data.get("success_count", 0) == 0
|
| 732 |
+
):
|
| 733 |
+
continue # No stats for this model yet
|
| 734 |
+
|
| 735 |
+
any_has_data = True
|
| 736 |
+
|
| 737 |
+
if not self._should_model_reset(model_data, window_seconds, now_ts):
|
| 738 |
+
return False # At least one model not ready
|
| 739 |
+
|
| 740 |
+
return any_has_data
|
| 741 |
+
|
| 742 |
+
def _archive_model_to_global(
|
| 743 |
+
self, data: Dict[str, Any], model: str, model_data: Dict[str, Any]
|
| 744 |
+
) -> None:
|
| 745 |
+
"""Archive a single model's stats to global."""
|
| 746 |
+
global_data = data.setdefault("global", {"models": {}})
|
| 747 |
+
global_model = global_data["models"].setdefault(
|
| 748 |
+
model,
|
| 749 |
+
{
|
| 750 |
+
"success_count": 0,
|
| 751 |
+
"prompt_tokens": 0,
|
| 752 |
+
"completion_tokens": 0,
|
| 753 |
+
"approx_cost": 0.0,
|
| 754 |
+
},
|
| 755 |
+
)
|
| 756 |
+
|
| 757 |
+
global_model["success_count"] += model_data.get("success_count", 0)
|
| 758 |
+
global_model["prompt_tokens"] += model_data.get("prompt_tokens", 0)
|
| 759 |
+
global_model["completion_tokens"] += model_data.get("completion_tokens", 0)
|
| 760 |
+
global_model["approx_cost"] += model_data.get("approx_cost", 0.0)
|
| 761 |
+
|
| 762 |
+
def _reset_model_data(self, model_data: Dict[str, Any]) -> None:
|
| 763 |
+
"""Reset a model's window and stats."""
|
| 764 |
+
model_data["window_start_ts"] = None
|
| 765 |
+
model_data["quota_reset_ts"] = None
|
| 766 |
+
model_data["success_count"] = 0
|
| 767 |
+
model_data["prompt_tokens"] = 0
|
| 768 |
+
model_data["completion_tokens"] = 0
|
| 769 |
+
model_data["approx_cost"] = 0.0
|
| 770 |
+
|
| 771 |
+
async def _check_window_reset(
|
| 772 |
+
self,
|
| 773 |
+
key: str,
|
| 774 |
+
data: Dict[str, Any],
|
| 775 |
+
reset_config: Dict[str, Any],
|
| 776 |
+
now_ts: float,
|
| 777 |
+
) -> bool:
|
| 778 |
+
"""
|
| 779 |
+
Check and perform rolling window reset for a credential.
|
| 780 |
+
|
| 781 |
+
Args:
|
| 782 |
+
key: Credential identifier
|
| 783 |
+
data: Usage data for this credential
|
| 784 |
+
reset_config: Provider's reset configuration
|
| 785 |
+
now_ts: Current timestamp
|
| 786 |
+
|
| 787 |
+
Returns:
|
| 788 |
+
True if data was modified and needs saving
|
| 789 |
+
"""
|
| 790 |
+
window_seconds = reset_config.get("window_seconds", 86400) # Default 24h
|
| 791 |
+
field_name = reset_config.get("field_name", "window")
|
| 792 |
+
description = reset_config.get("description", "rolling window")
|
| 793 |
+
|
| 794 |
+
# Get current window data
|
| 795 |
+
window_data = data.get(field_name, {})
|
| 796 |
+
window_start = window_data.get("start_ts")
|
| 797 |
+
|
| 798 |
+
# No window started yet - nothing to reset
|
| 799 |
+
if window_start is None:
|
| 800 |
+
return False
|
| 801 |
+
|
| 802 |
+
# Check if window has expired
|
| 803 |
+
window_end = window_start + window_seconds
|
| 804 |
+
if now_ts < window_end:
|
| 805 |
+
# Window still active
|
| 806 |
+
return False
|
| 807 |
+
|
| 808 |
+
# Window expired - perform reset
|
| 809 |
+
hours_elapsed = (now_ts - window_start) / 3600
|
| 810 |
+
lib_logger.info(
|
| 811 |
+
f"Resetting {field_name} for {mask_credential(key)} - "
|
| 812 |
+
f"{description} expired after {hours_elapsed:.1f}h"
|
| 813 |
+
)
|
| 814 |
+
|
| 815 |
+
# Archive to global
|
| 816 |
+
self._archive_to_global(data, window_data)
|
| 817 |
+
|
| 818 |
+
# Preserve unexpired cooldowns
|
| 819 |
+
self._preserve_unexpired_cooldowns(key, data, now_ts)
|
| 820 |
+
|
| 821 |
+
# Reset window stats (but don't start new window until first request)
|
| 822 |
+
data[field_name] = {"start_ts": None, "models": {}}
|
| 823 |
+
|
| 824 |
+
# Reset consecutive failures
|
| 825 |
+
if "failures" in data:
|
| 826 |
+
data["failures"] = {}
|
| 827 |
+
|
| 828 |
+
return True
|
| 829 |
+
|
| 830 |
+
async def _check_daily_reset(
|
| 831 |
+
self,
|
| 832 |
+
key: str,
|
| 833 |
+
data: Dict[str, Any],
|
| 834 |
+
now_utc: datetime,
|
| 835 |
+
today_str: str,
|
| 836 |
+
now_ts: float,
|
| 837 |
+
) -> bool:
|
| 838 |
+
"""
|
| 839 |
+
Check and perform legacy daily reset for a credential.
|
| 840 |
+
|
| 841 |
+
Args:
|
| 842 |
+
key: Credential identifier
|
| 843 |
+
data: Usage data for this credential
|
| 844 |
+
now_utc: Current datetime in UTC
|
| 845 |
+
today_str: Today's date as ISO string
|
| 846 |
+
now_ts: Current timestamp
|
| 847 |
+
|
| 848 |
+
Returns:
|
| 849 |
+
True if data was modified and needs saving
|
| 850 |
+
"""
|
| 851 |
+
last_reset_str = data.get("last_daily_reset", "")
|
| 852 |
+
|
| 853 |
+
if last_reset_str == today_str:
|
| 854 |
+
return False
|
| 855 |
+
|
| 856 |
+
last_reset_dt = None
|
| 857 |
+
if last_reset_str:
|
| 858 |
+
try:
|
| 859 |
+
last_reset_dt = datetime.fromisoformat(last_reset_str).replace(
|
| 860 |
+
tzinfo=timezone.utc
|
| 861 |
+
)
|
| 862 |
+
except ValueError:
|
| 863 |
+
pass
|
| 864 |
+
|
| 865 |
+
# Determine the reset threshold for today
|
| 866 |
+
reset_threshold_today = datetime.combine(
|
| 867 |
+
now_utc.date(), self.daily_reset_time_utc
|
| 868 |
+
)
|
| 869 |
+
|
| 870 |
+
if not (
|
| 871 |
+
last_reset_dt is None or last_reset_dt < reset_threshold_today <= now_utc
|
| 872 |
+
):
|
| 873 |
+
return False
|
| 874 |
+
|
| 875 |
+
lib_logger.debug(f"Performing daily reset for key {mask_credential(key)}")
|
| 876 |
+
|
| 877 |
+
# Preserve unexpired cooldowns
|
| 878 |
+
self._preserve_unexpired_cooldowns(key, data, now_ts)
|
| 879 |
+
|
| 880 |
+
# Reset consecutive failures
|
| 881 |
+
if "failures" in data:
|
| 882 |
+
data["failures"] = {}
|
| 883 |
+
|
| 884 |
+
# Archive daily stats to global
|
| 885 |
+
daily_data = data.get("daily", {})
|
| 886 |
+
if daily_data:
|
| 887 |
+
self._archive_to_global(data, daily_data)
|
| 888 |
+
|
| 889 |
+
# Reset daily stats
|
| 890 |
+
data["daily"] = {"date": today_str, "models": {}}
|
| 891 |
+
data["last_daily_reset"] = today_str
|
| 892 |
+
|
| 893 |
+
return True
|
| 894 |
+
|
| 895 |
+
def _archive_to_global(
|
| 896 |
+
self, data: Dict[str, Any], source_data: Dict[str, Any]
|
| 897 |
+
) -> None:
|
| 898 |
+
"""
|
| 899 |
+
Archive usage stats from a source field (daily/window) to global.
|
| 900 |
+
|
| 901 |
+
Args:
|
| 902 |
+
data: The credential's usage data
|
| 903 |
+
source_data: The source field data to archive (has "models" key)
|
| 904 |
+
"""
|
| 905 |
+
global_data = data.setdefault("global", {"models": {}})
|
| 906 |
+
for model, stats in source_data.get("models", {}).items():
|
| 907 |
+
global_model_stats = global_data["models"].setdefault(
|
| 908 |
+
model,
|
| 909 |
+
{
|
| 910 |
+
"success_count": 0,
|
| 911 |
+
"prompt_tokens": 0,
|
| 912 |
+
"completion_tokens": 0,
|
| 913 |
+
"approx_cost": 0.0,
|
| 914 |
+
},
|
| 915 |
+
)
|
| 916 |
+
global_model_stats["success_count"] += stats.get("success_count", 0)
|
| 917 |
+
global_model_stats["prompt_tokens"] += stats.get("prompt_tokens", 0)
|
| 918 |
+
global_model_stats["completion_tokens"] += stats.get("completion_tokens", 0)
|
| 919 |
+
global_model_stats["approx_cost"] += stats.get("approx_cost", 0.0)
|
| 920 |
+
|
| 921 |
+
def _preserve_unexpired_cooldowns(
|
| 922 |
+
self, key: str, data: Dict[str, Any], now_ts: float
|
| 923 |
+
) -> None:
|
| 924 |
+
"""
|
| 925 |
+
Preserve unexpired cooldowns during reset (important for long quota cooldowns).
|
| 926 |
+
|
| 927 |
+
Args:
|
| 928 |
+
key: Credential identifier (for logging)
|
| 929 |
+
data: The credential's usage data
|
| 930 |
+
now_ts: Current timestamp
|
| 931 |
+
"""
|
| 932 |
+
# Preserve unexpired model cooldowns
|
| 933 |
+
if "model_cooldowns" in data:
|
| 934 |
+
active_cooldowns = {
|
| 935 |
+
model: end_time
|
| 936 |
+
for model, end_time in data["model_cooldowns"].items()
|
| 937 |
+
if end_time > now_ts
|
| 938 |
+
}
|
| 939 |
+
if active_cooldowns:
|
| 940 |
+
max_remaining = max(
|
| 941 |
+
end_time - now_ts for end_time in active_cooldowns.values()
|
| 942 |
+
)
|
| 943 |
+
hours_remaining = max_remaining / 3600
|
| 944 |
+
lib_logger.info(
|
| 945 |
+
f"Preserving {len(active_cooldowns)} active cooldown(s) "
|
| 946 |
+
f"for key {mask_credential(key)} during reset "
|
| 947 |
+
f"(longest: {hours_remaining:.1f}h remaining)"
|
| 948 |
+
)
|
| 949 |
+
data["model_cooldowns"] = active_cooldowns
|
| 950 |
+
else:
|
| 951 |
+
data["model_cooldowns"] = {}
|
| 952 |
+
|
| 953 |
+
# Preserve unexpired key-level cooldown
|
| 954 |
+
if data.get("key_cooldown_until"):
|
| 955 |
+
if data["key_cooldown_until"] <= now_ts:
|
| 956 |
+
data["key_cooldown_until"] = None
|
| 957 |
+
else:
|
| 958 |
+
hours_remaining = (data["key_cooldown_until"] - now_ts) / 3600
|
| 959 |
+
lib_logger.info(
|
| 960 |
+
f"Preserving key-level cooldown for {mask_credential(key)} "
|
| 961 |
+
f"during reset ({hours_remaining:.1f}h remaining)"
|
| 962 |
+
)
|
| 963 |
+
else:
|
| 964 |
+
data["key_cooldown_until"] = None
|
| 965 |
+
|
| 966 |
def _initialize_key_states(self, keys: List[str]):
|
| 967 |
"""Initializes state tracking for all provided keys if not already present."""
|
| 968 |
for key in keys:
|
|
|
|
| 1083 |
priority = credential_priorities.get(key, 999)
|
| 1084 |
|
| 1085 |
# Get usage count for load balancing within priority groups
|
| 1086 |
+
# Uses grouped usage if model is in a quota group
|
| 1087 |
+
usage_count = self._get_grouped_usage_count(key, model)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1088 |
|
| 1089 |
# Group by priority
|
| 1090 |
if priority not in priority_groups:
|
|
|
|
| 1097 |
for priority_level in sorted_priorities:
|
| 1098 |
keys_in_priority = priority_groups[priority_level]
|
| 1099 |
|
| 1100 |
+
# Determine selection method based on provider's rotation mode
|
| 1101 |
+
provider = model.split("/")[0] if "/" in model else ""
|
| 1102 |
+
rotation_mode = self._get_rotation_mode(provider)
|
| 1103 |
+
|
| 1104 |
+
# Calculate effective concurrency based on priority tier
|
| 1105 |
+
multiplier = self._get_priority_multiplier(
|
| 1106 |
+
provider, priority_level, rotation_mode
|
| 1107 |
+
)
|
| 1108 |
+
effective_max_concurrent = max_concurrent * multiplier
|
| 1109 |
+
|
| 1110 |
# Within each priority group, use existing tier1/tier2 logic
|
| 1111 |
tier1_keys, tier2_keys = [], []
|
| 1112 |
for key, usage_count in keys_in_priority:
|
|
|
|
| 1116 |
if not key_state["models_in_use"]:
|
| 1117 |
tier1_keys.append((key, usage_count))
|
| 1118 |
# Tier 2: Keys that can accept more concurrent requests
|
| 1119 |
+
elif (
|
| 1120 |
+
key_state["models_in_use"].get(model, 0)
|
| 1121 |
+
< effective_max_concurrent
|
| 1122 |
+
):
|
| 1123 |
tier2_keys.append((key, usage_count))
|
| 1124 |
|
| 1125 |
+
if rotation_mode == "sequential":
|
| 1126 |
+
# Sequential mode: sort credentials by priority, usage, recency
|
| 1127 |
+
# Keep all candidates in sorted order (no filtering to single key)
|
| 1128 |
+
selection_method = "sequential"
|
| 1129 |
+
if tier1_keys:
|
| 1130 |
+
tier1_keys = self._sort_sequential(
|
| 1131 |
+
tier1_keys, credential_priorities
|
| 1132 |
+
)
|
| 1133 |
+
if tier2_keys:
|
| 1134 |
+
tier2_keys = self._sort_sequential(
|
| 1135 |
+
tier2_keys, credential_priorities
|
| 1136 |
+
)
|
| 1137 |
+
elif self.rotation_tolerance > 0:
|
| 1138 |
+
# Balanced mode with weighted randomness
|
| 1139 |
+
selection_method = "weighted-random"
|
| 1140 |
if tier1_keys:
|
| 1141 |
selected_key = self._select_weighted_random(
|
| 1142 |
tier1_keys, self.rotation_tolerance
|
|
|
|
| 1153 |
]
|
| 1154 |
else:
|
| 1155 |
# Deterministic: sort by usage within each tier
|
| 1156 |
+
selection_method = "least-used"
|
| 1157 |
tier1_keys.sort(key=lambda x: x[1])
|
| 1158 |
tier2_keys.sort(key=lambda x: x[1])
|
| 1159 |
|
|
|
|
| 1179 |
state = self.key_states[key]
|
| 1180 |
async with state["lock"]:
|
| 1181 |
current_count = state["models_in_use"].get(model, 0)
|
| 1182 |
+
if current_count < effective_max_concurrent:
|
| 1183 |
state["models_in_use"][model] = current_count + 1
|
| 1184 |
tier_name = (
|
| 1185 |
credential_tier_names.get(key, "unknown")
|
|
|
|
| 1188 |
)
|
| 1189 |
lib_logger.info(
|
| 1190 |
f"Acquired key {mask_credential(key)} for model {model} "
|
| 1191 |
+
f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
|
| 1192 |
)
|
| 1193 |
return key
|
| 1194 |
|
|
|
|
| 1217 |
|
| 1218 |
else:
|
| 1219 |
# Original logic when no priorities specified
|
| 1220 |
+
|
| 1221 |
+
# Determine selection method based on provider's rotation mode
|
| 1222 |
+
provider = model.split("/")[0] if "/" in model else ""
|
| 1223 |
+
rotation_mode = self._get_rotation_mode(provider)
|
| 1224 |
+
|
| 1225 |
+
# Calculate effective concurrency for default priority (999)
|
| 1226 |
+
# When no priorities are specified, all credentials get default priority
|
| 1227 |
+
default_priority = 999
|
| 1228 |
+
multiplier = self._get_priority_multiplier(
|
| 1229 |
+
provider, default_priority, rotation_mode
|
| 1230 |
+
)
|
| 1231 |
+
effective_max_concurrent = max_concurrent * multiplier
|
| 1232 |
+
|
| 1233 |
tier1_keys, tier2_keys = [], []
|
| 1234 |
|
| 1235 |
# First, filter the list of available keys to exclude any on cooldown.
|
|
|
|
| 1243 |
continue
|
| 1244 |
|
| 1245 |
# Prioritize keys based on their current usage to ensure load balancing.
|
| 1246 |
+
# Uses grouped usage if model is in a quota group
|
| 1247 |
+
usage_count = self._get_grouped_usage_count(key, model)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1248 |
key_state = self.key_states[key]
|
| 1249 |
|
| 1250 |
# Tier 1: Completely idle keys (preferred).
|
| 1251 |
if not key_state["models_in_use"]:
|
| 1252 |
tier1_keys.append((key, usage_count))
|
| 1253 |
# Tier 2: Keys that can accept more concurrent requests for this model.
|
| 1254 |
+
elif (
|
| 1255 |
+
key_state["models_in_use"].get(model, 0)
|
| 1256 |
+
< effective_max_concurrent
|
| 1257 |
+
):
|
| 1258 |
tier2_keys.append((key, usage_count))
|
| 1259 |
|
| 1260 |
+
if rotation_mode == "sequential":
|
| 1261 |
+
# Sequential mode: sort credentials by priority, usage, recency
|
| 1262 |
+
# Keep all candidates in sorted order (no filtering to single key)
|
| 1263 |
+
selection_method = "sequential"
|
| 1264 |
+
if tier1_keys:
|
| 1265 |
+
tier1_keys = self._sort_sequential(
|
| 1266 |
+
tier1_keys, credential_priorities
|
| 1267 |
+
)
|
| 1268 |
+
if tier2_keys:
|
| 1269 |
+
tier2_keys = self._sort_sequential(
|
| 1270 |
+
tier2_keys, credential_priorities
|
| 1271 |
+
)
|
| 1272 |
+
elif self.rotation_tolerance > 0:
|
| 1273 |
+
# Balanced mode with weighted randomness
|
| 1274 |
+
selection_method = "weighted-random"
|
| 1275 |
if tier1_keys:
|
| 1276 |
selected_key = self._select_weighted_random(
|
| 1277 |
tier1_keys, self.rotation_tolerance
|
|
|
|
| 1288 |
]
|
| 1289 |
else:
|
| 1290 |
# Deterministic: sort by usage within each tier
|
| 1291 |
+
selection_method = "least-used"
|
| 1292 |
tier1_keys.sort(key=lambda x: x[1])
|
| 1293 |
tier2_keys.sort(key=lambda x: x[1])
|
| 1294 |
|
|
|
|
| 1315 |
state = self.key_states[key]
|
| 1316 |
async with state["lock"]:
|
| 1317 |
current_count = state["models_in_use"].get(model, 0)
|
| 1318 |
+
if current_count < effective_max_concurrent:
|
| 1319 |
state["models_in_use"][model] = current_count + 1
|
| 1320 |
tier_name = (
|
| 1321 |
credential_tier_names.get(key)
|
|
|
|
| 1325 |
tier_info = f"tier: {tier_name}, " if tier_name else ""
|
| 1326 |
lib_logger.info(
|
| 1327 |
f"Acquired key {mask_credential(key)} for model {model} "
|
| 1328 |
+
f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
|
| 1329 |
)
|
| 1330 |
return key
|
| 1331 |
|
|
|
|
| 1399 |
"""
|
| 1400 |
Records a successful API call, resetting failure counters.
|
| 1401 |
It safely handles cases where token usage data is not available.
|
| 1402 |
+
|
| 1403 |
+
Supports two modes based on provider configuration:
|
| 1404 |
+
- per_model: Each model has its own window_start_ts and stats in key_data["models"]
|
| 1405 |
+
- credential: Legacy mode with key_data["daily"]["models"]
|
| 1406 |
"""
|
| 1407 |
await self._lazy_init()
|
| 1408 |
async with self._data_lock:
|
| 1409 |
+
now_ts = time.time()
|
| 1410 |
today_utc_str = datetime.now(timezone.utc).date().isoformat()
|
| 1411 |
+
|
| 1412 |
+
reset_config = self._get_usage_reset_config(key)
|
| 1413 |
+
reset_mode = (
|
| 1414 |
+
reset_config.get("mode", "credential") if reset_config else "credential"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1415 |
)
|
| 1416 |
|
| 1417 |
+
if reset_mode == "per_model":
|
| 1418 |
+
# New per-model structure
|
| 1419 |
+
key_data = self._usage_data.setdefault(
|
| 1420 |
+
key,
|
| 1421 |
+
{
|
| 1422 |
+
"models": {},
|
| 1423 |
+
"global": {"models": {}},
|
| 1424 |
+
"model_cooldowns": {},
|
| 1425 |
+
"failures": {},
|
| 1426 |
+
},
|
| 1427 |
+
)
|
| 1428 |
+
|
| 1429 |
+
# Ensure models dict exists
|
| 1430 |
+
if "models" not in key_data:
|
| 1431 |
+
key_data["models"] = {}
|
| 1432 |
+
|
| 1433 |
+
# Get or create per-model data with window tracking
|
| 1434 |
+
model_data = key_data["models"].setdefault(
|
| 1435 |
+
model,
|
| 1436 |
+
{
|
| 1437 |
+
"window_start_ts": None,
|
| 1438 |
+
"quota_reset_ts": None,
|
| 1439 |
+
"success_count": 0,
|
| 1440 |
+
"prompt_tokens": 0,
|
| 1441 |
+
"completion_tokens": 0,
|
| 1442 |
+
"approx_cost": 0.0,
|
| 1443 |
+
},
|
| 1444 |
+
)
|
| 1445 |
+
|
| 1446 |
+
# Start window on first request for this model
|
| 1447 |
+
if model_data.get("window_start_ts") is None:
|
| 1448 |
+
model_data["window_start_ts"] = now_ts
|
| 1449 |
+
|
| 1450 |
+
# Set expected quota reset time from provider config
|
| 1451 |
+
window_seconds = (
|
| 1452 |
+
reset_config.get("window_seconds", 0) if reset_config else 0
|
| 1453 |
+
)
|
| 1454 |
+
if window_seconds > 0:
|
| 1455 |
+
model_data["quota_reset_ts"] = now_ts + window_seconds
|
| 1456 |
+
|
| 1457 |
+
window_hours = window_seconds / 3600 if window_seconds else 0
|
| 1458 |
+
lib_logger.info(
|
| 1459 |
+
f"Started {window_hours:.1f}h window for model {model} on {mask_credential(key)}"
|
| 1460 |
+
)
|
| 1461 |
+
|
| 1462 |
+
# Record stats
|
| 1463 |
+
model_data["success_count"] += 1
|
| 1464 |
+
usage_data_ref = model_data # For token/cost recording below
|
| 1465 |
+
|
| 1466 |
+
else:
|
| 1467 |
+
# Legacy credential-level structure
|
| 1468 |
+
key_data = self._usage_data.setdefault(
|
| 1469 |
+
key,
|
| 1470 |
+
{
|
| 1471 |
+
"daily": {"date": today_utc_str, "models": {}},
|
| 1472 |
+
"global": {"models": {}},
|
| 1473 |
+
"model_cooldowns": {},
|
| 1474 |
+
"failures": {},
|
| 1475 |
+
},
|
| 1476 |
+
)
|
| 1477 |
+
|
| 1478 |
+
if "last_daily_reset" not in key_data:
|
| 1479 |
+
key_data["last_daily_reset"] = today_utc_str
|
| 1480 |
+
|
| 1481 |
+
# Get or create model data in daily structure
|
| 1482 |
+
usage_data_ref = key_data["daily"]["models"].setdefault(
|
| 1483 |
+
model,
|
| 1484 |
+
{
|
| 1485 |
+
"success_count": 0,
|
| 1486 |
+
"prompt_tokens": 0,
|
| 1487 |
+
"completion_tokens": 0,
|
| 1488 |
+
"approx_cost": 0.0,
|
| 1489 |
+
},
|
| 1490 |
+
)
|
| 1491 |
+
usage_data_ref["success_count"] += 1
|
| 1492 |
|
| 1493 |
+
# Reset failures for this model
|
| 1494 |
model_failures = key_data.setdefault("failures", {}).setdefault(model, {})
|
| 1495 |
model_failures["consecutive_failures"] = 0
|
| 1496 |
+
|
| 1497 |
+
# Clear transient cooldown on success (but NOT quota_reset_ts)
|
| 1498 |
if model in key_data.get("model_cooldowns", {}):
|
| 1499 |
del key_data["model_cooldowns"][model]
|
| 1500 |
|
| 1501 |
+
# Record token and cost usage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1502 |
if (
|
| 1503 |
completion_response
|
| 1504 |
and hasattr(completion_response, "usage")
|
| 1505 |
and completion_response.usage
|
| 1506 |
):
|
| 1507 |
usage = completion_response.usage
|
| 1508 |
+
usage_data_ref["prompt_tokens"] += usage.prompt_tokens
|
| 1509 |
+
usage_data_ref["completion_tokens"] += getattr(
|
| 1510 |
usage, "completion_tokens", 0
|
| 1511 |
+
)
|
| 1512 |
lib_logger.info(
|
| 1513 |
f"Recorded usage from response object for key {mask_credential(key)}"
|
| 1514 |
)
|
| 1515 |
try:
|
| 1516 |
provider_name = model.split("/")[0]
|
| 1517 |
+
provider_instance = self._get_provider_instance(provider_name)
|
| 1518 |
|
| 1519 |
+
if provider_instance and getattr(
|
| 1520 |
+
provider_instance, "skip_cost_calculation", False
|
|
|
|
| 1521 |
):
|
| 1522 |
lib_logger.debug(
|
| 1523 |
f"Skipping cost calculation for provider '{provider_name}' (custom provider)."
|
| 1524 |
)
|
| 1525 |
else:
|
|
|
|
| 1526 |
if isinstance(completion_response, litellm.EmbeddingResponse):
|
|
|
|
| 1527 |
model_info = litellm.get_model_info(model)
|
| 1528 |
input_cost = model_info.get("input_cost_per_token")
|
| 1529 |
if input_cost:
|
|
|
|
| 1538 |
)
|
| 1539 |
|
| 1540 |
if cost is not None:
|
| 1541 |
+
usage_data_ref["approx_cost"] += cost
|
| 1542 |
except Exception as e:
|
| 1543 |
lib_logger.warning(
|
| 1544 |
f"Could not calculate cost for model {model}: {e}"
|
|
|
|
| 1546 |
elif isinstance(completion_response, asyncio.Future) or hasattr(
|
| 1547 |
completion_response, "__aiter__"
|
| 1548 |
):
|
| 1549 |
+
pass # Stream - usage recorded from chunks
|
|
|
|
| 1550 |
else:
|
| 1551 |
lib_logger.warning(
|
| 1552 |
f"No usage data found in completion response for model {model}. Recording success without token count."
|
| 1553 |
)
|
| 1554 |
|
| 1555 |
+
key_data["last_used_ts"] = now_ts
|
| 1556 |
|
| 1557 |
await self._save_usage()
|
| 1558 |
|
|
|
|
| 1563 |
classified_error: ClassifiedError,
|
| 1564 |
increment_consecutive_failures: bool = True,
|
| 1565 |
):
|
| 1566 |
+
"""Records a failure and applies cooldowns based on error type.
|
| 1567 |
+
|
| 1568 |
+
Distinguishes between:
|
| 1569 |
+
- quota_exceeded: Long cooldown with exact reset time (from quota_reset_timestamp)
|
| 1570 |
+
Sets quota_reset_ts on model (and group) - this becomes authoritative stats reset time
|
| 1571 |
+
- rate_limit: Short transient cooldown (just wait and retry)
|
| 1572 |
+
Only sets model_cooldowns - does NOT affect stats reset timing
|
| 1573 |
|
| 1574 |
Args:
|
| 1575 |
key: The API key or credential identifier
|
|
|
|
| 1580 |
"""
|
| 1581 |
await self._lazy_init()
|
| 1582 |
async with self._data_lock:
|
| 1583 |
+
now_ts = time.time()
|
| 1584 |
today_utc_str = datetime.now(timezone.utc).date().isoformat()
|
| 1585 |
+
|
| 1586 |
+
reset_config = self._get_usage_reset_config(key)
|
| 1587 |
+
reset_mode = (
|
| 1588 |
+
reset_config.get("mode", "credential") if reset_config else "credential"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1589 |
)
|
| 1590 |
|
| 1591 |
+
# Initialize key data with appropriate structure
|
| 1592 |
+
if reset_mode == "per_model":
|
| 1593 |
+
key_data = self._usage_data.setdefault(
|
| 1594 |
+
key,
|
| 1595 |
+
{
|
| 1596 |
+
"models": {},
|
| 1597 |
+
"global": {"models": {}},
|
| 1598 |
+
"model_cooldowns": {},
|
| 1599 |
+
"failures": {},
|
| 1600 |
+
},
|
| 1601 |
+
)
|
| 1602 |
+
else:
|
| 1603 |
+
key_data = self._usage_data.setdefault(
|
| 1604 |
+
key,
|
| 1605 |
+
{
|
| 1606 |
+
"daily": {"date": today_utc_str, "models": {}},
|
| 1607 |
+
"global": {"models": {}},
|
| 1608 |
+
"model_cooldowns": {},
|
| 1609 |
+
"failures": {},
|
| 1610 |
+
},
|
| 1611 |
+
)
|
| 1612 |
+
|
| 1613 |
# Provider-level errors (transient issues) should not count against the key
|
| 1614 |
provider_level_errors = {"server_error", "api_connection"}
|
| 1615 |
|
|
|
|
| 1621 |
|
| 1622 |
# Calculate cooldown duration based on error type
|
| 1623 |
cooldown_seconds = None
|
| 1624 |
+
model_cooldowns = key_data.setdefault("model_cooldowns", {})
|
| 1625 |
+
|
| 1626 |
+
if classified_error.error_type == "quota_exceeded":
|
| 1627 |
+
# Quota exhausted - use authoritative reset timestamp if available
|
| 1628 |
+
quota_reset_ts = classified_error.quota_reset_timestamp
|
| 1629 |
+
cooldown_seconds = classified_error.retry_after or 60
|
| 1630 |
+
|
| 1631 |
+
if quota_reset_ts and reset_mode == "per_model":
|
| 1632 |
+
# Set quota_reset_ts on model - this becomes authoritative stats reset time
|
| 1633 |
+
models_data = key_data.setdefault("models", {})
|
| 1634 |
+
model_data = models_data.setdefault(
|
| 1635 |
+
model,
|
| 1636 |
+
{
|
| 1637 |
+
"window_start_ts": None,
|
| 1638 |
+
"quota_reset_ts": None,
|
| 1639 |
+
"success_count": 0,
|
| 1640 |
+
"prompt_tokens": 0,
|
| 1641 |
+
"completion_tokens": 0,
|
| 1642 |
+
"approx_cost": 0.0,
|
| 1643 |
+
},
|
| 1644 |
+
)
|
| 1645 |
+
model_data["quota_reset_ts"] = quota_reset_ts
|
| 1646 |
+
|
| 1647 |
+
# Apply to all models in the same quota group
|
| 1648 |
+
group = self._get_model_quota_group(key, model)
|
| 1649 |
+
if group:
|
| 1650 |
+
grouped_models = self._get_grouped_models(key, group)
|
| 1651 |
+
for grouped_model in grouped_models:
|
| 1652 |
+
group_model_data = models_data.setdefault(
|
| 1653 |
+
grouped_model,
|
| 1654 |
+
{
|
| 1655 |
+
"window_start_ts": None,
|
| 1656 |
+
"quota_reset_ts": None,
|
| 1657 |
+
"success_count": 0,
|
| 1658 |
+
"prompt_tokens": 0,
|
| 1659 |
+
"completion_tokens": 0,
|
| 1660 |
+
"approx_cost": 0.0,
|
| 1661 |
+
},
|
| 1662 |
+
)
|
| 1663 |
+
group_model_data["quota_reset_ts"] = quota_reset_ts
|
| 1664 |
+
# Also set transient cooldown for selection logic
|
| 1665 |
+
model_cooldowns[grouped_model] = quota_reset_ts
|
| 1666 |
|
| 1667 |
+
reset_dt = datetime.fromtimestamp(
|
| 1668 |
+
quota_reset_ts, tz=timezone.utc
|
| 1669 |
+
)
|
| 1670 |
+
lib_logger.info(
|
| 1671 |
+
f"Quota exhausted for group '{group}' ({len(grouped_models)} models) "
|
| 1672 |
+
f"on {mask_credential(key)}. Resets at {reset_dt.isoformat()}"
|
| 1673 |
+
)
|
| 1674 |
+
else:
|
| 1675 |
+
reset_dt = datetime.fromtimestamp(
|
| 1676 |
+
quota_reset_ts, tz=timezone.utc
|
| 1677 |
+
)
|
| 1678 |
+
hours = (quota_reset_ts - now_ts) / 3600
|
| 1679 |
+
lib_logger.info(
|
| 1680 |
+
f"Quota exhausted for model {model} on {mask_credential(key)}. "
|
| 1681 |
+
f"Resets at {reset_dt.isoformat()} ({hours:.1f}h)"
|
| 1682 |
+
)
|
| 1683 |
+
|
| 1684 |
+
# Set transient cooldown for selection logic
|
| 1685 |
+
model_cooldowns[model] = quota_reset_ts
|
| 1686 |
+
else:
|
| 1687 |
+
# No authoritative timestamp or legacy mode - just use retry_after
|
| 1688 |
+
model_cooldowns[model] = now_ts + cooldown_seconds
|
| 1689 |
+
hours = cooldown_seconds / 3600
|
| 1690 |
+
lib_logger.info(
|
| 1691 |
+
f"Quota exhausted on {mask_credential(key)} for model {model}. "
|
| 1692 |
+
f"Cooldown: {cooldown_seconds}s ({hours:.1f}h)"
|
| 1693 |
+
)
|
| 1694 |
+
|
| 1695 |
+
elif classified_error.error_type == "rate_limit":
|
| 1696 |
+
# Transient rate limit - just set short cooldown (does NOT set quota_reset_ts)
|
| 1697 |
cooldown_seconds = classified_error.retry_after or 60
|
| 1698 |
+
model_cooldowns[model] = now_ts + cooldown_seconds
|
| 1699 |
lib_logger.info(
|
| 1700 |
+
f"Rate limit on {mask_credential(key)} for model {model}. "
|
| 1701 |
+
f"Transient cooldown: {cooldown_seconds}s"
|
| 1702 |
)
|
| 1703 |
+
|
| 1704 |
elif classified_error.error_type == "authentication":
|
| 1705 |
# Apply a 5-minute key-level lockout for auth errors
|
| 1706 |
+
key_data["key_cooldown_until"] = now_ts + 300
|
| 1707 |
+
cooldown_seconds = 300
|
| 1708 |
+
model_cooldowns[model] = now_ts + cooldown_seconds
|
| 1709 |
lib_logger.warning(
|
| 1710 |
f"Authentication error on key {mask_credential(key)}. Applying 5-minute key-level lockout."
|
| 1711 |
)
|
|
|
|
|
|
|
| 1712 |
|
| 1713 |
# If we should increment failures, calculate escalating backoff
|
| 1714 |
if should_increment:
|
|
|
|
| 1722 |
# If cooldown wasn't set by specific error type, use escalating backoff
|
| 1723 |
if cooldown_seconds is None:
|
| 1724 |
backoff_tiers = {1: 10, 2: 30, 3: 60, 4: 120}
|
| 1725 |
+
cooldown_seconds = backoff_tiers.get(count, 7200)
|
| 1726 |
+
model_cooldowns[model] = now_ts + cooldown_seconds
|
|
|
|
| 1727 |
lib_logger.warning(
|
| 1728 |
f"Failure #{count} for key {mask_credential(key)} with model {model}. "
|
| 1729 |
+
f"Error type: {classified_error.error_type}, cooldown: {cooldown_seconds}s"
|
| 1730 |
)
|
| 1731 |
else:
|
| 1732 |
# Provider-level errors: apply short cooldown but don't count against key
|
| 1733 |
if cooldown_seconds is None:
|
| 1734 |
+
cooldown_seconds = 30
|
| 1735 |
+
model_cooldowns[model] = now_ts + cooldown_seconds
|
| 1736 |
lib_logger.info(
|
| 1737 |
+
f"Provider-level error ({classified_error.error_type}) for key {mask_credential(key)} "
|
| 1738 |
+
f"with model {model}. NOT incrementing failures. Cooldown: {cooldown_seconds}s"
|
| 1739 |
)
|
| 1740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1741 |
# Check for key-level lockout condition
|
| 1742 |
await self._check_key_lockout(key, key_data)
|
| 1743 |
|
| 1744 |
key_data["last_failure"] = {
|
| 1745 |
+
"timestamp": now_ts,
|
| 1746 |
"model": model,
|
| 1747 |
"error": str(classified_error.original_exception),
|
| 1748 |
}
|