Spaces:

elmerzole
/

llm-api-proxy

Paused

App Files Files Community

Mirrowel commited on Dec 8, 2025

Commit

b7aa5d6

unverified ·

2 Parent(s): 0af8a39 73a2395

Merge branch 'main' into fix/antigravity-credential-stuck-unavailable

Browse files

Files changed (12) hide show

.env.example +77 -0
DOCUMENTATION.md +243 -5
README.md +49 -0
src/proxy_app/launcher_tui.py +445 -197
src/proxy_app/main.py +331 -169
src/proxy_app/settings_tool.py +918 -273
src/rotator_library/client.py +303 -26
src/rotator_library/error_handler.py +76 -2
src/rotator_library/providers/antigravity_provider.py +261 -54
src/rotator_library/providers/gemini_cli_provider.py +71 -34
src/rotator_library/providers/provider_interface.py +405 -12
src/rotator_library/usage_manager.py +1128 -165

.env.example CHANGED Viewed

@@ -159,6 +159,83 @@ MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1
 MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
 MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
 # ------------------------------------------------------------------------------
 # | [ADVANCED] Proxy Configuration                                             |
 # ------------------------------------------------------------------------------

 MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
 MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
+# --- Credential Rotation Mode ---
+# Controls how credentials are rotated when multiple are available for a provider.
+# This affects how the proxy selects the next credential to use for requests.
+#
+# Available modes:
+#   balanced   - (Default) Rotate credentials evenly across requests to distribute load.
+#                Best for API keys with per-minute rate limits.
+#   sequential - Use one credential until it's exhausted (429 error), then switch to next.
+#                Best for credentials with daily/weekly quotas (e.g., free tier accounts).
+#                When a credential hits quota, it's put on cooldown based on the reset time
+#                parsed from the provider's error response.
+#
+# Format: ROTATION_MODE_<PROVIDER_NAME>=<mode>
+#
+# Provider Defaults:
+#   - antigravity: sequential (free tier accounts with daily quotas)
+#   - All others: balanced
+#
+# Example:
+# ROTATION_MODE_GEMINI=sequential    # Use Gemini keys until quota exhausted
+# ROTATION_MODE_OPENAI=balanced      # Distribute load across OpenAI keys (default)
+# ROTATION_MODE_ANTIGRAVITY=balanced # Override Antigravity's sequential default
+#
+# ROTATION_MODE_GEMINI=balanced
+# ROTATION_MODE_ANTIGRAVITY=sequential
+# --- Priority-Based Concurrency Multipliers ---
+# Credentials can be assigned to priority tiers (1=highest, 2, 3, etc.).
+# Each tier can have a concurrency multiplier that increases the effective
+# concurrent request limit for credentials in that tier.
+#
+# How it works:
+#   effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier
+#
+# This allows paid/premium credentials to handle more concurrent requests than
+# free tier credentials, regardless of rotation mode.
+#
+# Provider Defaults (built into provider classes):
+#   Antigravity:
+#     Priority 1: 5x (paid ultra tier)
+#     Priority 2: 3x (standard paid tier)
+#     Priority 3+: 2x (sequential mode) or 1x (balanced mode)
+#   Gemini CLI:
+#     Priority 1: 5x
+#     Priority 2: 3x
+#     Others: 1x (all modes)
+#
+# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
+#
+# Mode-specific overrides (optional):
+# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
+#
+# Examples:
+# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10   # Override P1 to 10x
+# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1    # Override P3 to 1x
+# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1  # P2 = 1x in balanced mode only
+# --- Model Quota Groups ---
+# Models that share quota/cooldown timing. When one model in a group hits
+# quota exhausted (429), all models in the group receive the same cooldown timestamp.
+# They also reset (archive stats) together when the quota period expires.
+#
+# This is useful for providers where multiple model variants share the same
+# underlying quota (e.g., Claude Sonnet and Opus on Antigravity).
+#
+# Format: QUOTA_GROUPS_<PROVIDER>_<GROUP>="model1,model2,model3"
+#
+# To DISABLE a default group, set it to empty string:
+#   QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
+#
+# Default groups:
+#   ANTIGRAVITY.CLAUDE: claude-sonnet-4-5,claude-opus-4-5
+#
+# Examples:
+# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
+# QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
 # ------------------------------------------------------------------------------
 # | [ADVANCED] Proxy Configuration                                             |
 # ------------------------------------------------------------------------------

DOCUMENTATION.md CHANGED Viewed

@@ -96,22 +96,30 @@ The `_safe_streaming_wrapper` is a critical component for stability. It:
 ### 2.2. `usage_manager.py` - Stateful Concurrency & Usage Management
-This class is the stateful core of the library, managing concurrency, usage tracking, and cooldowns.
 #### Key Concepts
 *   **Async-Native & Lazy-Loaded**: Fully asynchronous, using `aiofiles` for non-blocking file I/O. Usage data is loaded only when needed.
 *   **Fine-Grained Locking**: Each API key has its own `asyncio.Lock` and `asyncio.Condition`. This allows for highly granular control.
 #### Tiered Key Acquisition Strategy
 The `acquire_key` method uses a sophisticated strategy to balance load:
 1.  **Filtering**: Keys currently on cooldown (global or model-specific) are excluded.
-2.  **Tiering**: Valid keys are split into two tiers:
     *   **Tier 1 (Ideal)**: Keys that are completely idle (0 concurrent requests).
     *   **Tier 2 (Acceptable)**: Keys that are busy but still under their configured `MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER>` limit for the requested model. This allows a single key to be used multiple times for the same model, maximizing throughput.
-3.  **Selection Strategy** (configurable via `rotation_tolerance`):
     *   **Deterministic (tolerance=0.0)**: Within each tier, keys are sorted by daily usage count and the least-used key is always selected. This provides perfect load balance but predictable patterns.
     *   **Weighted Random (tolerance>0, default)**: Keys are selected randomly with weights biased toward less-used ones:
         - Formula: `weight = (max_usage - credential_usage) + tolerance + 1`
@@ -119,14 +127,19 @@ The `acquire_key` method uses a sophisticated strategy to balance load:
         - `tolerance=5.0+`: High randomness - even heavily-used credentials have significant probability
         - **Security Benefit**: Unpredictable selection patterns make rate limit detection and fingerprinting harder
         - **Load Balance**: Lower-usage credentials still preferred, maintaining reasonable distribution
-4.  **Concurrency Limits**: Checks against `max_concurrent` limits to prevent overloading a single key.
-5.  **Priority Groups**: When credential prioritization is enabled, higher-tier credentials (lower priority numbers) are tried first before moving to lower tiers.
 #### Failure Handling & Cooldowns
 *   **Escalating Backoff**: When a failure occurs, the key gets a temporary cooldown for that specific model. Consecutive failures increase this time (10s -> 30s -> 60s -> 120s).
 *   **Key-Level Lockouts**: If a key accumulates failures across multiple distinct models (3+), it is assumed to be dead/revoked and placed on a global 5-minute lockout.
 *   **Authentication Errors**: Immediate 5-minute global lockout.
 ### 2.3. `batch_manager.py` - Efficient Request Aggregation
@@ -406,6 +419,10 @@ The most sophisticated provider implementation, supporting Google's internal Ant
 - **Thought Signature Caching**: Server-side caching of encrypted signatures for multi-turn Gemini 3 conversations
 - **Model-Specific Logic**: Automatic configuration based on model type (Gemini 3, Claude Sonnet, Claude Opus)
 - **Credential Prioritization**: Automatic tier detection with paid credentials prioritized over free (paid tier resets every 5 hours, free tier resets weekly)
 #### Model Support
@@ -585,6 +602,221 @@ cache/
 ---
 ### 2.12. Google OAuth Base (`providers/google_oauth_base.py`)
 A refactored, reusable OAuth2 base class that eliminates code duplication across Google-based providers.
@@ -637,6 +869,12 @@ The library handles provider idiosyncrasies through specialized "Provider" class
 The `GeminiCliProvider` is the most complex implementation, mimicking the Google Cloud Code extension.
 #### Authentication (`gemini_auth_base.py`)
  *   **Device Flow**: Uses a standard OAuth 2.0 flow. The `credential_tool` spins up a local web server (`localhost:8085`) to capture the callback from Google's auth page.

 ### 2.2. `usage_manager.py` - Stateful Concurrency & Usage Management
+This class is the stateful core of the library, managing concurrency, usage tracking, cooldowns, and quota resets.
 #### Key Concepts
 *   **Async-Native & Lazy-Loaded**: Fully asynchronous, using `aiofiles` for non-blocking file I/O. Usage data is loaded only when needed.
 *   **Fine-Grained Locking**: Each API key has its own `asyncio.Lock` and `asyncio.Condition`. This allows for highly granular control.
+*   **Multiple Reset Modes**: Supports three reset strategies:
+    - **per_model**: Each model has independent usage window with authoritative `quota_reset_ts` (from provider errors)
+    - **credential**: One window per credential with custom duration (e.g., 5 hours, 7 days)
+    - **daily**: Legacy daily reset at `daily_reset_time_utc`
+*   **Model Quota Groups**: Models can be grouped to share quota limits. When one model in a group hits quota, all receive the same reset timestamp.
 #### Tiered Key Acquisition Strategy
 The `acquire_key` method uses a sophisticated strategy to balance load:
 1.  **Filtering**: Keys currently on cooldown (global or model-specific) are excluded.
+2.  **Rotation Mode**: Determines credential selection strategy:
+    *   **Balanced Mode** (default): Credentials sorted by usage count - least-used first for even distribution
+    *   **Sequential Mode**: Credentials sorted by usage count descending - most-used first to maintain sticky behavior until exhausted
+3.  **Tiering**: Valid keys are split into two tiers:
     *   **Tier 1 (Ideal)**: Keys that are completely idle (0 concurrent requests).
     *   **Tier 2 (Acceptable)**: Keys that are busy but still under their configured `MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER>` limit for the requested model. This allows a single key to be used multiple times for the same model, maximizing throughput.
+4.  **Selection Strategy** (configurable via `rotation_tolerance`):
     *   **Deterministic (tolerance=0.0)**: Within each tier, keys are sorted by daily usage count and the least-used key is always selected. This provides perfect load balance but predictable patterns.
     *   **Weighted Random (tolerance>0, default)**: Keys are selected randomly with weights biased toward less-used ones:
         - Formula: `weight = (max_usage - credential_usage) + tolerance + 1`
         - `tolerance=5.0+`: High randomness - even heavily-used credentials have significant probability
         - **Security Benefit**: Unpredictable selection patterns make rate limit detection and fingerprinting harder
         - **Load Balance**: Lower-usage credentials still preferred, maintaining reasonable distribution
+5.  **Concurrency Limits**: Checks against `max_concurrent` limits (with priority multipliers applied) to prevent overloading a single key.
+6.  **Priority Groups**: When credential prioritization is enabled, higher-tier credentials (lower priority numbers) are tried first before moving to lower tiers.
 #### Failure Handling & Cooldowns
 *   **Escalating Backoff**: When a failure occurs, the key gets a temporary cooldown for that specific model. Consecutive failures increase this time (10s -> 30s -> 60s -> 120s).
 *   **Key-Level Lockouts**: If a key accumulates failures across multiple distinct models (3+), it is assumed to be dead/revoked and placed on a global 5-minute lockout.
 *   **Authentication Errors**: Immediate 5-minute global lockout.
+*   **Quota Exhausted Errors**: When a provider returns a quota exhausted error with an authoritative reset timestamp:
+    - The `quota_reset_ts` is extracted from the error response (via provider's `parse_quota_error()` method)
+    - Applied to the affected model (and all models in its quota group if defined)
+    - Cooldown preserved even during daily/window resets until the actual quota reset time
+    - Logs show the exact reset time in local timezone with ISO format
 ### 2.3. `batch_manager.py` - Efficient Request Aggregation
 - **Thought Signature Caching**: Server-side caching of encrypted signatures for multi-turn Gemini 3 conversations
 - **Model-Specific Logic**: Automatic configuration based on model type (Gemini 3, Claude Sonnet, Claude Opus)
 - **Credential Prioritization**: Automatic tier detection with paid credentials prioritized over free (paid tier resets every 5 hours, free tier resets weekly)
+- **Sequential Rotation Mode**: Default rotation mode is sequential (use credentials until exhausted) to maximize thought signature cache hits
+- **Per-Model Quota Tracking**: Each model tracks independent usage windows with authoritative reset timestamps from quota errors
+- **Quota Groups**: Claude models (Sonnet 4.5 + Opus 4.5) can be grouped to share quota limits (disabled by default, configurable via `QUOTA_GROUPS_ANTIGRAVITY_CLAUDE`)
+- **Priority Multipliers**: Paid tier credentials get higher concurrency limits (Priority 1: 5x, Priority 2: 3x, Priority 3+: 2x in sequential mode)
 #### Model Support
 ---
+### 2.13. Sequential Rotation & Per-Model Quota Tracking
+A comprehensive credential rotation and quota management system introduced in PR #31.
+#### Rotation Modes
+Two rotation strategies are available per provider:
+**Balanced Mode (Default)**:
+- Distributes load evenly across all credentials
+- Least-used credentials selected first
+- Best for providers with per-minute rate limits
+- Prevents any single credential from being overused
+**Sequential Mode**:
+- Uses one credential until it's exhausted (429 quota error)
+- Switches to next credential only after current one fails
+- Most-used credentials selected first (sticky behavior)
+- Best for providers with daily/weekly quotas
+- Maximizes cache hit rates (e.g., Antigravity thought signatures)
+- Default for Antigravity provider
+**Configuration**:
+```env
+# Set per provider
+ROTATION_MODE_GEMINI=sequential
+ROTATION_MODE_OPENAI=balanced
+ROTATION_MODE_ANTIGRAVITY=balanced  # Override default
+```
+#### Per-Model Quota Tracking
+Instead of tracking usage at the credential level, the system now supports granular per-model tracking:
+**Data Structure** (when `mode="per_model"`):
+```json
+{
+  "credential_id": {
+    "models": {
+      "gemini-2.5-pro": {
+        "window_start_ts": 1733678400.0,
+        "quota_reset_ts": 1733696400.0,
+        "success_count": 15,
+        "prompt_tokens": 5000,
+        "completion_tokens": 1000,
+        "approx_cost": 0.05,
+        "window_started": "2025-12-08 14:00:00 +0100",
+        "quota_resets": "2025-12-08 19:00:00 +0100"
+      }
+    },
+    "global": {...},
+    "model_cooldowns": {...}
+  }
+}
+```
+**Key Features**:
+- Each model tracks its own usage window independently
+- `window_start_ts`: When the current quota period started
+- `quota_reset_ts`: Authoritative reset time from provider error response
+- Human-readable timestamps added for debugging
+- Supports custom window durations (5h, 7d, etc.)
+#### Provider-Specific Quota Parsing
+Providers can implement `parse_quota_error()` to extract precise reset times from error responses:
+```python
+@staticmethod
+def parse_quota_error(error, error_body) -> Optional[Dict]:
+    """Extract quota reset timestamp from provider error.
+    Returns:
+        {
+            'quota_reset_timestamp': 1733696400.0,  # Unix timestamp
+            'retry_after': 18000  # Seconds until reset
+        }
+    """
+```
+**Google RPC Format** (Antigravity, Gemini CLI):
+- Parses `RetryInfo` and `ErrorInfo` from error details
+- Handles duration strings: `"143h4m52.73s"` or `"515092.73s"`
+- Extracts `quotaResetTimeStamp` and converts to Unix timestamp
+- Falls back to `quotaResetDelay` if timestamp not available
+**Example Error Response**:
+```json
+{
+  "error": {
+    "code": 429,
+    "message": "Quota exceeded",
+    "details": [{
+      "@type": "type.googleapis.com/google.rpc.RetryInfo",
+      "retryDelay": "143h4m52.73s"
+    }, {
+      "@type": "type.googleapis.com/google.rpc.ErrorInfo",
+      "metadata": {
+        "quotaResetTimeStamp": "2025-12-08T19:00:00Z"
+      }
+    }]
+  }
+}
+```
+#### Model Quota Groups
+Models that share the same quota limits can be grouped:
+**Configuration**:
+```env
+# Models in a group share quota/cooldown timing
+QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
+# To disable a default group:
+QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
+```
+**Behavior**:
+- When one model hits quota, all models in the group receive the same `quota_reset_ts`
+- Combined weighted usage for credential selection (e.g., Opus counts 2x vs Sonnet)
+- Group resets only when ALL models' quotas have reset
+- Preserves unexpired cooldowns during other resets
+**Provider Implementation**:
+```python
+class AntigravityProvider(ProviderInterface):
+    model_quota_groups = {
+        "claude": ["claude-sonnet-4-5", "claude-opus-4-5"]
+    }
+    model_usage_weights = {
+        "claude-opus-4-5": 2  # Opus counts 2x vs Sonnet
+    }
+```
+#### Priority-Based Concurrency Multipliers
+Credentials can be assigned to priority tiers with configurable concurrency limits:
+**Configuration**:
+```env
+# Universal multipliers (all modes)
+CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10
+CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2=3
+# Mode-specific overrides
+CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1  # Lower in balanced mode
+```
+**How it works**:
+```python
+effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier
+```
+**Provider Defaults** (Antigravity):
+- Priority 1 (paid ultra): 5x multiplier
+- Priority 2 (standard paid): 3x multiplier
+- Priority 3+ (free): 2x (sequential mode) or 1x (balanced mode)
+**Benefits**:
+- Paid credentials handle more load without manual configuration
+- Different concurrency for different rotation modes
+- Automatic tier detection based on credential properties
+#### Reset Window Configuration
+Providers can specify custom reset windows per priority tier:
+```python
+class AntigravityProvider(ProviderInterface):
+    usage_reset_configs = {
+        frozenset([1, 2]): UsageResetConfigDef(
+            mode="per_model",
+            window_hours=5,  # 5-hour rolling window for paid tiers
+            field_name="5h_window"
+        ),
+        frozenset([3, 4, 5]): UsageResetConfigDef(
+            mode="per_model",
+            window_hours=168,  # 7-day window for free tier
+            field_name="7d_window"
+        )
+    }
+```
+**Supported Modes**:
+- `per_model`: Independent window per model with authoritative reset times
+- `credential`: Single window per credential (legacy)
+- `daily`: Daily reset at configured UTC hour (legacy)
+#### Usage Flow
+1. **Request arrives** for model X with credential Y
+2. **Check rotation mode**: Sequential or balanced?
+3. **Select credential**:
+   - Filter by priority tier requirements
+   - Apply concurrency multiplier for effective limit
+   - Sort by rotation mode strategy
+4. **Check quota**:
+   - Load model's usage data
+   - Check if within window (window_start_ts to quota_reset_ts)
+   - Check model quota groups for combined usage
+5. **Execute request**
+6. **On success**: Increment model usage count
+7. **On quota error**:
+   - Parse error for `quota_reset_ts`
+   - Apply to model (and quota group)
+   - Credential remains on cooldown until reset time
+8. **On window expiration**:
+   - Archive model data to global stats
+   - Start fresh window with new `window_start_ts`
+   - Preserve unexpired quota cooldowns
+---
 ### 2.12. Google OAuth Base (`providers/google_oauth_base.py`)
 A refactored, reusable OAuth2 base class that eliminates code duplication across Google-based providers.
 The `GeminiCliProvider` is the most complex implementation, mimicking the Google Cloud Code extension.
+**New in PR #31**:
+- **Quota Parsing**: Implements `parse_quota_error()` using Google RPC format parser
+- **Tier Configuration**: Defines `tier_priorities` and `usage_reset_configs` for automatic priority resolution
+- **Balanced Rotation**: Defaults to balanced mode (unlike Antigravity which uses sequential)
+- **Priority Multipliers**: Same as Antigravity (P1: 5x, P2: 3x, others: 1x)
 #### Authentication (`gemini_auth_base.py`)
  *   **Device Flow**: Uses a standard OAuth 2.0 flow. The `credential_tool` spins up a local web server (`localhost:8085`) to capture the callback from Google's auth page.

README.md CHANGED Viewed

@@ -38,6 +38,12 @@ This project provides a powerful solution for developers building complex applic
     - Automatic thinking block sanitization for Claude models (with recovery strategies)
     - Note: Claude thinking mode requires careful conversation state management (see [Antigravity documentation](DOCUMENTATION.md#antigravity-claude-extended-thinking-sanitization) for details)
 -   **🆕 Credential Prioritization**: Automatic tier detection and priority-based credential selection ensures paid-tier credentials are used for premium models that require them.
 -   **🆕 Weighted Random Rotation**: Configurable credential rotation strategy - choose between deterministic (perfect balance) or weighted random (unpredictable, harder to fingerprint) selection.
 -   **🆕 Enhanced Gemini CLI**: Improved project discovery, paid vs free tier detection, and Gemini 3 support with thoughtSignature caching.
 -   **🆕 Temperature Override**: Global temperature=0 override option to prevent tool hallucination issues with low-temperature settings.
@@ -129,6 +135,8 @@ The proxy now includes a powerful **interactive Text User Interface (TUI)** that
   - Configure custom OpenAI-compatible providers
   - Define provider models (simple or advanced JSON format)
   - Set concurrency limits per provider
   - Interactive numbered menus for easy selection
   - Pending changes system with save/discard options
@@ -545,6 +553,47 @@ ANTIGRAVITY_GEMINI3_TOOL_FIX=true  # Prevent tool hallucination
     ```
 #### Concurrency Control
 -   **`MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER>`**: Set the maximum number of simultaneous requests allowed per API key for a specific provider. Default is `1` (no concurrency). Useful for high-throughput providers.

     - Automatic thinking block sanitization for Claude models (with recovery strategies)
     - Note: Claude thinking mode requires careful conversation state management (see [Antigravity documentation](DOCUMENTATION.md#antigravity-claude-extended-thinking-sanitization) for details)
 -   **🆕 Credential Prioritization**: Automatic tier detection and priority-based credential selection ensures paid-tier credentials are used for premium models that require them.
+-   **🆕 Sequential Rotation Mode**: Choose between balanced (distribute load evenly) or sequential (use until exhausted) credential rotation strategies. Sequential mode maximizes cache hit rates for providers like Antigravity.
+-   **🆕 Per-Model Quota Tracking**: Granular per-model usage tracking with authoritative quota reset timestamps from provider error responses. Each model maintains its own window with `window_start_ts` and `quota_reset_ts`.
+-   **🆕 Model Quota Groups**: Group models that share quota limits (e.g., Claude Sonnet and Opus). When one model in a group hits quota, all receive the same cooldown timestamp.
+-   **🆕 Priority-Based Concurrency**: Assign credentials to priority tiers (1=highest) with configurable concurrency multipliers. Paid-tier credentials can handle more concurrent requests than free-tier ones.
+-   **🆕 Provider-Specific Quota Parsing**: Extended provider interface with `parse_quota_error()` method to extract precise retry-after times from provider-specific error formats (e.g., Google RPC format).
+-   **🆕 Flexible Rolling Windows**: Support for provider-specific quota reset configurations (5-hour, 7-day, etc.) replacing hardcoded daily resets.
 -   **🆕 Weighted Random Rotation**: Configurable credential rotation strategy - choose between deterministic (perfect balance) or weighted random (unpredictable, harder to fingerprint) selection.
 -   **🆕 Enhanced Gemini CLI**: Improved project discovery, paid vs free tier detection, and Gemini 3 support with thoughtSignature caching.
 -   **🆕 Temperature Override**: Global temperature=0 override option to prevent tool hallucination issues with low-temperature settings.
   - Configure custom OpenAI-compatible providers
   - Define provider models (simple or advanced JSON format)
   - Set concurrency limits per provider
+  - Configure rotation modes (balanced vs sequential)
+  - Manage priority-based concurrency multipliers
   - Interactive numbered menus for easy selection
   - Pending changes system with save/discard options
     ```
+#### Credential Rotation Modes
+-   **`ROTATION_MODE_<PROVIDER>`**: Controls how credentials are rotated when multiple are available. Default: `balanced` (except Antigravity which defaults to `sequential`).
+    - `balanced`: Rotate credentials evenly across requests to distribute load. Best for per-minute rate limits.
+    - `sequential`: Use one credential until exhausted (429 error), then switch to next. Best for daily/weekly quotas.
+    ```env
+    ROTATION_MODE_GEMINI=sequential    # Use Gemini keys until quota exhausted
+    ROTATION_MODE_OPENAI=balanced      # Distribute load across OpenAI keys (default)
+    ROTATION_MODE_ANTIGRAVITY=balanced # Override Antigravity's sequential default
+    ```
+#### Priority-Based Concurrency Multipliers
+-   **`CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>`**: Assign concurrency multipliers to priority tiers. Higher-tier credentials handle more concurrent requests.
+    ```env
+    # Universal multipliers (apply to all rotation modes)
+    CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10   # 10x for paid ultra tier
+    CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1    # 1x for lower tiers
+    # Mode-specific overrides
+    CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1  # P2 = 1x in balanced mode only
+    ```
+    **Provider Defaults** (built into provider classes):
+    - **Antigravity**: Priority 1: 5x, Priority 2: 3x, Priority 3+: 2x (sequential) or 1x (balanced)
+    - **Gemini CLI**: Priority 1: 5x, Priority 2: 3x, Others: 1x
+#### Model Quota Groups
+-   **`QUOTA_GROUPS_<PROVIDER>_<GROUP>`**: Define models that share quota/cooldown timing. When one model hits quota, all in the group receive the same cooldown timestamp.
+    ```env
+    QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
+    QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
+    # To disable a default group:
+    QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
+    ```
+    **Default Groups**:
+    - **Antigravity**: Claude group (Sonnet 4.5 + Opus 4.5) with Opus counting 2x vs Sonnet
 #### Concurrency Control
 -   **`MAX_CONCURRENT_REQUESTS_PER_KEY_<PROVIDER>`**: Set the maximum number of simultaneous requests allowed per API key for a specific provider. Default is `1` (no concurrency). Useful for high-throughput providers.

src/proxy_app/launcher_tui.py CHANGED Viewed

@@ -18,32 +18,33 @@ console = Console()
 def clear_screen():
     """
-    Cross-platform terminal clear that works robustly on both
     classic Windows conhost and modern terminals (Windows Terminal, Linux, Mac).
     Uses native OS commands instead of ANSI escape sequences:
     - Windows (conhost & Windows Terminal): cls
     - Unix-like systems (Linux, Mac): clear
     """
-    os.system('cls' if os.name == 'nt' else 'clear')
 class LauncherConfig:
     """Manages launcher_config.json (host, port, logging only)"""
     def __init__(self, config_path: Path = Path("launcher_config.json")):
         self.config_path = config_path
         self.defaults = {
             "host": "127.0.0.1",
             "port": 8000,
-            "enable_request_logging": False
         }
         self.config = self.load()
     def load(self) -> dict:
         """Load config from file or create with defaults."""
         if self.config_path.exists():
             try:
-                with open(self.config_path, 'r') as f:
                     config = json.load(f)
                 # Merge with defaults for any missing keys
                 for key, value in self.defaults.items():
@@ -53,22 +54,23 @@ class LauncherConfig:
             except (json.JSONDecodeError, IOError):
                 return self.defaults.copy()
         return self.defaults.copy()
     def save(self):
         """Save current config to file."""
         import datetime
         self.config["last_updated"] = datetime.datetime.now().isoformat()
         try:
-            with open(self.config_path, 'w') as f:
                 json.dump(self.config, f, indent=2)
         except IOError as e:
             console.print(f"[red]Error saving config: {e}[/red]")
     def update(self, **kwargs):
         """Update config values."""
         self.config.update(kwargs)
         self.save()
     @staticmethod
     def update_proxy_api_key(new_key: str):
         """Update PROXY_API_KEY in .env only"""
@@ -79,7 +81,7 @@ class LauncherConfig:
 class SettingsDetector:
     """Detects settings from .env for display"""
     @staticmethod
     def _load_local_env() -> dict:
         """Load environment variables from local .env file only"""
@@ -88,13 +90,13 @@ class SettingsDetector:
         if not env_file.exists():
             return env_dict
         try:
-            with open(env_file, 'r', encoding='utf-8') as f:
                 for line in f:
                     line = line.strip()
-                    if not line or line.startswith('#'):
                         continue
-                    if '=' in line:
-                        key, _, value = line.partition('=')
                         key, value = key.strip(), value.strip()
                         if value and value[0] in ('"', "'") and value[-1] == value[0]:
                             value = value[1:-1]
@@ -112,16 +114,16 @@ class SettingsDetector:
             "model_definitions": SettingsDetector.detect_model_definitions(),
             "concurrency_limits": SettingsDetector.detect_concurrency_limits(),
             "model_filters": SettingsDetector.detect_model_filters(),
-            "provider_settings": SettingsDetector.detect_provider_settings()
         }
     @staticmethod
     def detect_credentials() -> dict:
         """Detect API keys and OAuth credentials"""
         from pathlib import Path
         providers = {}
         # Scan for API keys
         env_vars = SettingsDetector._load_local_env()
         for key, value in env_vars.items():
@@ -130,7 +132,7 @@ class SettingsDetector:
                 if provider not in providers:
                     providers[provider] = {"api_keys": 0, "oauth": 0, "custom": False}
                 providers[provider]["api_keys"] += 1
         # Scan for OAuth credentials
         oauth_dir = Path("oauth_credentials")
         if oauth_dir.exists():
@@ -139,19 +141,19 @@ class SettingsDetector:
                 if provider not in providers:
                     providers[provider] = {"api_keys": 0, "oauth": 0, "custom": False}
                 providers[provider]["oauth"] += 1
         # Mark custom providers (have API_BASE set)
         for provider in providers:
             if os.getenv(f"{provider.upper()}_API_BASE"):
                 providers[provider]["custom"] = True
         return providers
     @staticmethod
     def detect_custom_api_bases() -> dict:
         """Detect custom API base URLs (not in hardcoded map)"""
         from proxy_app.provider_urls import PROVIDER_URL_MAP
         bases = {}
         env_vars = SettingsDetector._load_local_env()
         for key, value in env_vars.items():
@@ -161,7 +163,7 @@ class SettingsDetector:
                 if provider not in PROVIDER_URL_MAP:
                     bases[provider] = value
         return bases
     @staticmethod
     def detect_model_definitions() -> dict:
         """Detect provider model definitions"""
@@ -179,7 +181,7 @@ class SettingsDetector:
                 except (json.JSONDecodeError, ValueError):
                     pass
         return models
     @staticmethod
     def detect_concurrency_limits() -> dict:
         """Detect max concurrent requests per key"""
@@ -193,7 +195,7 @@ class SettingsDetector:
                 except (json.JSONDecodeError, ValueError):
                     pass
         return limits
     @staticmethod
     def detect_model_filters() -> dict:
         """Detect active model filters (basic info only: defined or not)"""
@@ -210,7 +212,7 @@ class SettingsDetector:
                 else:
                     filters[provider]["has_whitelist"] = True
         return filters
     @staticmethod
     def detect_provider_settings() -> dict:
         """Detect provider-specific settings (Antigravity, Gemini CLI)"""
@@ -219,10 +221,10 @@ class SettingsDetector:
         except ImportError:
             # Fallback for direct execution or testing
             from .settings_tool import PROVIDER_SETTINGS_MAP
         provider_settings = {}
         env_vars = SettingsDetector._load_local_env()
         for provider, definitions in PROVIDER_SETTINGS_MAP.items():
             modified_count = 0
             for key, definition in definitions.items():
@@ -231,7 +233,7 @@ class SettingsDetector:
                     # Check if value differs from default
                     default = definition.get("default")
                     setting_type = definition.get("type", "str")
                     try:
                         if setting_type == "bool":
                             current = env_value.lower() in ("true", "1", "yes")
@@ -239,21 +241,21 @@ class SettingsDetector:
                             current = int(env_value)
                         else:
                             current = env_value
                         if current != default:
                             modified_count += 1
                     except (ValueError, AttributeError):
                         pass
             if modified_count > 0:
                 provider_settings[provider] = modified_count
         return provider_settings
 class LauncherTUI:
     """Main launcher interface"""
     def __init__(self):
         self.console = Console()
         self.config = LauncherConfig()
@@ -261,90 +263,100 @@ class LauncherTUI:
         self.env_file = Path.cwd() / ".env"
         # Load .env file to ensure environment variables are available
         load_dotenv(dotenv_path=self.env_file, override=True)
     def needs_onboarding(self) -> bool:
         """Check if onboarding is needed"""
         return not self.env_file.exists() or not os.getenv("PROXY_API_KEY")
     def run(self):
         """Main TUI loop"""
         while self.running:
             self.show_main_menu()
     def show_main_menu(self):
         """Display main menu and handle selection"""
         clear_screen()
         # Detect all settings
         settings = SettingsDetector.get_all_settings()
         credentials = settings["credentials"]
         custom_bases = settings["custom_bases"]
         # Check if setup is needed
         show_warning = self.needs_onboarding()
         # Build title with GitHub link
-        self.console.print(Panel.fit(
-            "[bold cyan]🚀 LLM API Key Proxy - Interactive Launcher[/bold cyan]",
-            border_style="cyan"
-        ))
-        self.console.print("[dim]GitHub: [blue underline]https://github.com/Mirrowel/LLM-API-Key-Proxy[/blue underline][/dim]")
         # Show warning if .env file doesn't exist
         if show_warning:
             self.console.print()
-            self.console.print(Panel(
-                Text.from_markup(
-                    "⚠️  [bold yellow]INITIAL SETUP REQUIRED[/bold yellow]\n\n"
-                    "The proxy needs initial configuration:\n"
-                    "  ❌ No .env file found\n\n"
-                    "Why this matters:\n"
-                    "  • The .env file stores your credentials and settings\n"
-                    "  • PROXY_API_KEY protects your proxy from unauthorized access\n"
-                    "  • Provider API keys enable LLM access\n\n"
-                    "What to do:\n"
-                    "  1. Select option \"3. Manage Credentials\" to launch the credential tool\n"
-                    "  2. The tool will create .env and set up PROXY_API_KEY automatically\n"
-                    "  3. You can add provider credentials (API keys or OAuth)\n\n"
-                    "⚠️  Note: The credential tool adds PROXY_API_KEY by default.\n"
-                    "   You can remove it later if you want an unsecured proxy."
-                ),
-                border_style="yellow",
-                expand=False
-            ))
         # Show security warning if PROXY_API_KEY is missing (but .env exists)
         elif not os.getenv("PROXY_API_KEY"):
             self.console.print()
-            self.console.print(Panel(
-                Text.from_markup(
-                    "⚠️  [bold red]SECURITY WARNING: PROXY_API_KEY Not Set[/bold red]\n\n"
-                    "Your proxy is currently UNSECURED!\n"
-                    "Anyone can access it without authentication.\n\n"
-                    "This is a serious security risk if your proxy is accessible\n"
-                    "from the internet or untrusted networks.\n\n"
-                    "👉 [bold]Recommended:[/bold] Set PROXY_API_KEY in .env file\n"
-                    "   Use option \"2. Configure Proxy Settings\" → \"3. Set Proxy API Key\"\n"
-                    "   or option \"3. Manage Credentials\""
-                ),
-                border_style="red",
-                expand=False
-            ))
         # Show config
         self.console.print()
         self.console.print("[bold]📋 Proxy Configuration[/bold]")
         self.console.print("━" * 70)
         self.console.print(f"   Host:                {self.config.config['host']}")
         self.console.print(f"   Port:                {self.config.config['port']}")
-        self.console.print(f"   Request Logging:     {'✅ Enabled' if self.config.config['enable_request_logging'] else '❌ Disabled'}")
         # Show actual API key value
-        proxy_key = os.getenv('PROXY_API_KEY')
         if proxy_key:
             self.console.print(f"   Proxy API Key:       {proxy_key}")
         else:
             self.console.print("   Proxy API Key:       [red]Not Set (INSECURE!)[/red]")
         # Show status summary
         self.console.print()
         self.console.print("[bold]📊 Status Summary[/bold]")
@@ -352,12 +364,19 @@ class LauncherTUI:
         provider_count = len(credentials)
         custom_count = len(custom_bases)
         provider_settings = settings.get("provider_settings", {})
-        has_advanced = bool(settings["model_definitions"] or settings["concurrency_limits"] or settings["model_filters"] or provider_settings)
         self.console.print(f"   Providers:           {provider_count} configured")
         self.console.print(f"   Custom Providers:    {custom_count} configured")
-        self.console.print(f"   Advanced Settings:   {'Active (view in menu 4)' if has_advanced else 'None'}")
         # Show menu
         self.console.print()
         self.console.print("━" * 70)
@@ -367,23 +386,29 @@ class LauncherTUI:
         if show_warning:
             self.console.print("   1. ▶️  Run Proxy Server")
             self.console.print("   2. ⚙️  Configure Proxy Settings")
-            self.console.print("   3. 🔑 Manage Credentials            ⬅️  [bold yellow]Start here![/bold yellow]")
         else:
             self.console.print("   1. ▶️  Run Proxy Server")
             self.console.print("   2. ⚙️  Configure Proxy Settings")
             self.console.print("   3. 🔑 Manage Credentials")
         self.console.print("   4. 📊 View Provider & Advanced Settings")
         self.console.print("   5. 🔄 Reload Configuration")
         self.console.print("   6. ℹ️  About")
         self.console.print("   7. 🚪 Exit")
         self.console.print()
         self.console.print("━" * 70)
         self.console.print()
-        choice = Prompt.ask("Select option", choices=["1", "2", "3", "4", "5", "6", "7"], show_choices=False)
         if choice == "1":
             self.run_proxy()
         elif choice == "2":
@@ -393,7 +418,7 @@ class LauncherTUI:
         elif choice == "4":
             self.show_provider_settings_menu()
         elif choice == "5":
-            load_dotenv(dotenv_path=Path.cwd() / ".env",override=True)
             self.config = LauncherConfig()  # Reload config
             self.console.print("\n[green]✅ Configuration reloaded![/green]")
         elif choice == "6":
@@ -401,25 +426,64 @@ class LauncherTUI:
         elif choice == "7":
             self.running = False
             sys.exit(0)
     def show_config_menu(self):
         """Display configuration sub-menu"""
         while True:
             clear_screen()
-            self.console.print(Panel.fit(
-                "[bold cyan]⚙️  Proxy Configuration[/bold cyan]",
-                border_style="cyan"
-            ))
             self.console.print()
             self.console.print("[bold]📋 Current Settings[/bold]")
             self.console.print("━" * 70)
             self.console.print(f"   Host:                {self.config.config['host']}")
             self.console.print(f"   Port:                {self.config.config['port']}")
-            self.console.print(f"   Request Logging:     {'✅ Enabled' if self.config.config['enable_request_logging'] else '❌ Disabled'}")
-            self.console.print(f"   Proxy API Key:       {'✅ Set' if os.getenv('PROXY_API_KEY') else '❌ Not Set'}")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
@@ -429,45 +493,172 @@ class LauncherTUI:
             self.console.print("   2. 🔌 Set Port")
             self.console.print("   3. 🔑 Set Proxy API Key")
             self.console.print("   4. 📝 Toggle Request Logging")
-            self.console.print("   5. ↩️  Back to Main Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
-            choice = Prompt.ask("Select option", choices=["1", "2", "3", "4", "5"], show_choices=False)
             if choice == "1":
-                new_host = Prompt.ask("Enter new host IP", default=self.config.config["host"])
                 self.config.update(host=new_host)
                 self.console.print(f"\n[green]✅ Host updated to: {new_host}[/green]")
             elif choice == "2":
-                new_port = IntPrompt.ask("Enter new port", default=self.config.config["port"])
                 if 1 <= new_port <= 65535:
                     self.config.update(port=new_port)
-                    self.console.print(f"\n[green]✅ Port updated to: {new_port}[/green]")
                 else:
                     self.console.print("\n[red]❌ Port must be between 1-65535[/red]")
             elif choice == "3":
                 current = os.getenv("PROXY_API_KEY", "")
-                new_key = Prompt.ask("Enter new Proxy API Key", default=current)
-                if new_key and new_key != current:
                     LauncherConfig.update_proxy_api_key(new_key)
-                    self.console.print("\n[green]✅ Proxy API Key updated successfully![/green]")
-                    self.console.print("   Updated in .env file")
                 else:
                     self.console.print("\n[yellow]No changes made[/yellow]")
             elif choice == "4":
                 current = self.config.config["enable_request_logging"]
                 self.config.update(enable_request_logging=not current)
-                self.console.print(f"\n[green]✅ Request Logging {'enabled' if not current else 'disabled'}![/green]")
             elif choice == "5":
                 break
     def show_provider_settings_menu(self):
         """Display provider/advanced settings (read-only + launch tool)"""
         clear_screen()
         settings = SettingsDetector.get_all_settings()
         credentials = settings["credentials"]
         custom_bases = settings["custom_bases"]
@@ -475,12 +666,14 @@ class LauncherTUI:
         concurrency = settings["concurrency_limits"]
         filters = settings["model_filters"]
         provider_settings = settings.get("provider_settings", {})
-        self.console.print(Panel.fit(
-            "[bold cyan]📊 Provider & Advanced Settings[/bold cyan]",
-            border_style="cyan"
-        ))
         # Configured Providers
         self.console.print()
         self.console.print("[bold]📊 Configured Providers[/bold]")
@@ -490,18 +683,22 @@ class LauncherTUI:
                 provider_name = provider.title()
                 parts = []
                 if info["api_keys"] > 0:
-                    parts.append(f"{info['api_keys']} API key{'s' if info['api_keys'] > 1 else ''}")
                 if info["oauth"] > 0:
-                    parts.append(f"{info['oauth']} OAuth credential{'s' if info['oauth'] > 1 else ''}")
                 display = " + ".join(parts)
                 if info["custom"]:
                     display += " (Custom)"
                 self.console.print(f"   ✅ {provider_name:20} {display}")
         else:
             self.console.print("   [dim]No providers configured[/dim]")
         # Custom API Bases
         if custom_bases:
             self.console.print()
@@ -509,15 +706,17 @@ class LauncherTUI:
             self.console.print("━" * 70)
             for provider, base in custom_bases.items():
                 self.console.print(f"   • {provider:15} {base}")
         # Model Definitions
         if model_defs:
             self.console.print()
             self.console.print("[bold]📦 Provider Model Definitions[/bold]")
             self.console.print("━" * 70)
             for provider, count in model_defs.items():
-                self.console.print(f"   • {provider:15} {count} model{'s' if count > 1 else ''} configured")
         # Concurrency Limits
         if concurrency:
             self.console.print()
@@ -526,7 +725,7 @@ class LauncherTUI:
             for provider, limit in concurrency.items():
                 self.console.print(f"   • {provider:15} {limit} requests/key")
             self.console.print("   • Default:        1 request/key (all others)")
         # Model Filters (basic info only)
         if filters:
             self.console.print()
@@ -540,7 +739,7 @@ class LauncherTUI:
                     status_parts.append("Ignore list")
                 status = " + ".join(status_parts) if status_parts else "None"
                 self.console.print(f"   • {provider:15} ✅ {status}")
         # Provider-Specific Settings
         self.console.print()
         self.console.print("[bold]🔬 Provider-Specific Settings[/bold]")
@@ -553,158 +752,207 @@ class LauncherTUI:
             display_name = provider.replace("_", " ").title()
             modified = provider_settings.get(provider, 0)
             if modified > 0:
-                self.console.print(f"   • {display_name:20} [yellow]{modified} setting{'s' if modified > 1 else ''} modified[/yellow]")
             else:
                 self.console.print(f"   • {display_name:20} [dim]using defaults[/dim]")
         # Actions
         self.console.print()
         self.console.print("━" * 70)
         self.console.print()
         self.console.print("[bold]💡 Actions[/bold]")
         self.console.print()
-        self.console.print("   1. 🔧 Launch Settings Tool      (configure advanced settings)")
         self.console.print("   2. ↩️  Back to Main Menu")
         self.console.print()
         self.console.print("━" * 70)
-        self.console.print("[dim]ℹ️  Advanced settings are stored in .env file.\n   Use the Settings Tool to configure them interactively.[/dim]")
         self.console.print()
-        self.console.print("[dim]⚠️  Note: Settings Tool supports only common configuration types.\n   For complex settings, edit .env directly.[/dim]")
         self.console.print()
         choice = Prompt.ask("Select option", choices=["1", "2"], show_choices=False)
         if choice == "1":
             self.launch_settings_tool()
         # choice == "2" returns to main menu
     def launch_credential_tool(self):
         """Launch credential management tool"""
         import time
         # CRITICAL: Show full loading UI to replace the 6-7 second blank wait
         clear_screen()
         _start_time = time.time()
         # Show the same header as standalone mode
         self.console.print("━" * 70)
         self.console.print("Interactive Credential Setup Tool")
         self.console.print("GitHub: https://github.com/Mirrowel/LLM-API-Key-Proxy")
         self.console.print("━" * 70)
         self.console.print("Loading credential management components...")
         # Now import with spinner (this is where the 6-7 second delay happens)
         with self.console.status("Initializing credential tool...", spinner="dots"):
-            from rotator_library.credential_tool import run_credential_tool, _ensure_providers_loaded
             _, PROVIDER_PLUGINS = _ensure_providers_loaded()
         self.console.print("✓ Credential tool initialized")
         _elapsed = time.time() - _start_time
-        self.console.print(f"✓ Tool ready in {_elapsed:.2f}s ({len(PROVIDER_PLUGINS)} providers available)")
         # Small delay to let user see the ready message
         time.sleep(0.5)
         # Run the tool with from_launcher=True to skip duplicate loading screen
         run_credential_tool(from_launcher=True)
         # Reload environment after credential tool
         load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
     def launch_settings_tool(self):
         """Launch settings configuration tool"""
         from proxy_app.settings_tool import run_settings_tool
         run_settings_tool()
         # Reload environment after settings tool
         load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
     def show_about(self):
         """Display About page with project information"""
         clear_screen()
-        self.console.print(Panel.fit(
-            "[bold cyan]ℹ️  About LLM API Key Proxy[/bold cyan]",
-            border_style="cyan"
-        ))
         self.console.print()
         self.console.print("[bold]📦 Project Information[/bold]")
         self.console.print("━" * 70)
         self.console.print("   [bold cyan]LLM API Key Proxy[/bold cyan]")
-        self.console.print("   A lightweight, high-performance proxy server for managing")
         self.console.print("   LLM API keys with automatic rotation and OAuth support")
         self.console.print()
-        self.console.print("   [dim]GitHub:[/dim] [blue underline]https://github.com/Mirrowel/LLM-API-Key-Proxy[/blue underline]")
         self.console.print()
         self.console.print("[bold]✨ Key Features[/bold]")
         self.console.print("━" * 70)
-        self.console.print("   • [green]Smart Key Rotation[/green] - Automatic rotation across multiple API keys")
-        self.console.print("   • [green]OAuth Support[/green] - Automated OAuth flows for supported providers")
-        self.console.print("   • [green]Multiple Providers[/green] - Support for 10+ LLM providers")
-        self.console.print("   • [green]Custom Providers[/green] - Easy integration of custom OpenAI-compatible APIs")
-        self.console.print("   • [green]Advanced Filtering[/green] - Model whitelists and ignore lists per provider")
-        self.console.print("   • [green]Concurrency Control[/green] - Per-key rate limiting and request management")
-        self.console.print("   • [green]Cost Tracking[/green] - Track usage and costs across all providers")
-        self.console.print("   • [green]Interactive TUI[/green] - Beautiful terminal interface for easy configuration")
         self.console.print()
         self.console.print("[bold]📝 License & Credits[/bold]")
         self.console.print("━" * 70)
         self.console.print("   Made with ❤️  by the community")
         self.console.print("   Open source - contributions welcome!")
         self.console.print()
         self.console.print("━" * 70)
         self.console.print()
         Prompt.ask("Press Enter to return to main menu", default="")
     def run_proxy(self):
         """Prepare and launch proxy in same window"""
         # Check if forced onboarding needed
         if self.needs_onboarding():
             clear_screen()
-            self.console.print(Panel(
-                Text.from_markup(
-                    "⚠️  [bold yellow]Setup Required[/bold yellow]\n\n"
-                    "Cannot start without .env.\n"
-                    "Launching credential tool..."
-                ),
-                border_style="yellow"
-            ))
             # Force credential tool
-            from rotator_library.credential_tool import ensure_env_defaults, run_credential_tool
             ensure_env_defaults()
             load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
             run_credential_tool()
             load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
             # Check again after credential tool
             if not os.getenv("PROXY_API_KEY"):
-                self.console.print("\n[red]❌ PROXY_API_KEY still not set. Cannot start proxy.[/red]")
                 return
         # Clear console and modify sys.argv
         clear_screen()
-        self.console.print(f"\n[bold green]🚀 Starting proxy on {self.config.config['host']}:{self.config.config['port']}...[/bold green]\n")
         # Clear console again to remove the starting message before main.py shows loading details
         import time
         time.sleep(0.5)  # Brief pause so user sees the message
         clear_screen()
         # Reconstruct sys.argv for main.py
         sys.argv = [
             "main.py",
-            "--host", self.config.config["host"],
-            "--port", str(self.config.config["port"])
         ]
         if self.config.config["enable_request_logging"]:
             sys.argv.append("--enable-request-logging")
         # Exit TUI - main.py will continue execution
         self.running = False

 def clear_screen():
     """
+    Cross-platform terminal clear that works robustly on both
     classic Windows conhost and modern terminals (Windows Terminal, Linux, Mac).
     Uses native OS commands instead of ANSI escape sequences:
     - Windows (conhost & Windows Terminal): cls
     - Unix-like systems (Linux, Mac): clear
     """
+    os.system("cls" if os.name == "nt" else "clear")
 class LauncherConfig:
     """Manages launcher_config.json (host, port, logging only)"""
     def __init__(self, config_path: Path = Path("launcher_config.json")):
         self.config_path = config_path
         self.defaults = {
             "host": "127.0.0.1",
             "port": 8000,
+            "enable_request_logging": False,
         }
         self.config = self.load()
     def load(self) -> dict:
         """Load config from file or create with defaults."""
         if self.config_path.exists():
             try:
+                with open(self.config_path, "r") as f:
                     config = json.load(f)
                 # Merge with defaults for any missing keys
                 for key, value in self.defaults.items():
             except (json.JSONDecodeError, IOError):
                 return self.defaults.copy()
         return self.defaults.copy()
     def save(self):
         """Save current config to file."""
         import datetime
         self.config["last_updated"] = datetime.datetime.now().isoformat()
         try:
+            with open(self.config_path, "w") as f:
                 json.dump(self.config, f, indent=2)
         except IOError as e:
             console.print(f"[red]Error saving config: {e}[/red]")
     def update(self, **kwargs):
         """Update config values."""
         self.config.update(kwargs)
         self.save()
     @staticmethod
     def update_proxy_api_key(new_key: str):
         """Update PROXY_API_KEY in .env only"""
 class SettingsDetector:
     """Detects settings from .env for display"""
     @staticmethod
     def _load_local_env() -> dict:
         """Load environment variables from local .env file only"""
         if not env_file.exists():
             return env_dict
         try:
+            with open(env_file, "r", encoding="utf-8") as f:
                 for line in f:
                     line = line.strip()
+                    if not line or line.startswith("#"):
                         continue
+                    if "=" in line:
+                        key, _, value = line.partition("=")
                         key, value = key.strip(), value.strip()
                         if value and value[0] in ('"', "'") and value[-1] == value[0]:
                             value = value[1:-1]
             "model_definitions": SettingsDetector.detect_model_definitions(),
             "concurrency_limits": SettingsDetector.detect_concurrency_limits(),
             "model_filters": SettingsDetector.detect_model_filters(),
+            "provider_settings": SettingsDetector.detect_provider_settings(),
         }
     @staticmethod
     def detect_credentials() -> dict:
         """Detect API keys and OAuth credentials"""
         from pathlib import Path
         providers = {}
         # Scan for API keys
         env_vars = SettingsDetector._load_local_env()
         for key, value in env_vars.items():
                 if provider not in providers:
                     providers[provider] = {"api_keys": 0, "oauth": 0, "custom": False}
                 providers[provider]["api_keys"] += 1
         # Scan for OAuth credentials
         oauth_dir = Path("oauth_credentials")
         if oauth_dir.exists():
                 if provider not in providers:
                     providers[provider] = {"api_keys": 0, "oauth": 0, "custom": False}
                 providers[provider]["oauth"] += 1
         # Mark custom providers (have API_BASE set)
         for provider in providers:
             if os.getenv(f"{provider.upper()}_API_BASE"):
                 providers[provider]["custom"] = True
         return providers
     @staticmethod
     def detect_custom_api_bases() -> dict:
         """Detect custom API base URLs (not in hardcoded map)"""
         from proxy_app.provider_urls import PROVIDER_URL_MAP
         bases = {}
         env_vars = SettingsDetector._load_local_env()
         for key, value in env_vars.items():
                 if provider not in PROVIDER_URL_MAP:
                     bases[provider] = value
         return bases
     @staticmethod
     def detect_model_definitions() -> dict:
         """Detect provider model definitions"""
                 except (json.JSONDecodeError, ValueError):
                     pass
         return models
     @staticmethod
     def detect_concurrency_limits() -> dict:
         """Detect max concurrent requests per key"""
                 except (json.JSONDecodeError, ValueError):
                     pass
         return limits
     @staticmethod
     def detect_model_filters() -> dict:
         """Detect active model filters (basic info only: defined or not)"""
                 else:
                     filters[provider]["has_whitelist"] = True
         return filters
     @staticmethod
     def detect_provider_settings() -> dict:
         """Detect provider-specific settings (Antigravity, Gemini CLI)"""
         except ImportError:
             # Fallback for direct execution or testing
             from .settings_tool import PROVIDER_SETTINGS_MAP
         provider_settings = {}
         env_vars = SettingsDetector._load_local_env()
         for provider, definitions in PROVIDER_SETTINGS_MAP.items():
             modified_count = 0
             for key, definition in definitions.items():
                     # Check if value differs from default
                     default = definition.get("default")
                     setting_type = definition.get("type", "str")
                     try:
                         if setting_type == "bool":
                             current = env_value.lower() in ("true", "1", "yes")
                             current = int(env_value)
                         else:
                             current = env_value
                         if current != default:
                             modified_count += 1
                     except (ValueError, AttributeError):
                         pass
             if modified_count > 0:
                 provider_settings[provider] = modified_count
         return provider_settings
 class LauncherTUI:
     """Main launcher interface"""
     def __init__(self):
         self.console = Console()
         self.config = LauncherConfig()
         self.env_file = Path.cwd() / ".env"
         # Load .env file to ensure environment variables are available
         load_dotenv(dotenv_path=self.env_file, override=True)
     def needs_onboarding(self) -> bool:
         """Check if onboarding is needed"""
         return not self.env_file.exists() or not os.getenv("PROXY_API_KEY")
     def run(self):
         """Main TUI loop"""
         while self.running:
             self.show_main_menu()
     def show_main_menu(self):
         """Display main menu and handle selection"""
         clear_screen()
         # Detect all settings
         settings = SettingsDetector.get_all_settings()
         credentials = settings["credentials"]
         custom_bases = settings["custom_bases"]
         # Check if setup is needed
         show_warning = self.needs_onboarding()
         # Build title with GitHub link
+        self.console.print(
+            Panel.fit(
+                "[bold cyan]🚀 LLM API Key Proxy - Interactive Launcher[/bold cyan]",
+                border_style="cyan",
+            )
+        )
+        self.console.print(
+            "[dim]GitHub: [blue underline]https://github.com/Mirrowel/LLM-API-Key-Proxy[/blue underline][/dim]"
+        )
         # Show warning if .env file doesn't exist
         if show_warning:
             self.console.print()
+            self.console.print(
+                Panel(
+                    Text.from_markup(
+                        "⚠️  [bold yellow]INITIAL SETUP REQUIRED[/bold yellow]\n\n"
+                        "The proxy needs initial configuration:\n"
+                        "  ❌ No .env file found\n\n"
+                        "Why this matters:\n"
+                        "  • The .env file stores your credentials and settings\n"
+                        "  • PROXY_API_KEY protects your proxy from unauthorized access\n"
+                        "  • Provider API keys enable LLM access\n\n"
+                        "What to do:\n"
+                        '  1. Select option "3. Manage Credentials" to launch the credential tool\n'
+                        "  2. The tool will create .env and set up PROXY_API_KEY automatically\n"
+                        "  3. You can add provider credentials (API keys or OAuth)\n\n"
+                        "⚠️  Note: The credential tool adds PROXY_API_KEY by default.\n"
+                        "   You can remove it later if you want an unsecured proxy."
+                    ),
+                    border_style="yellow",
+                    expand=False,
+                )
+            )
         # Show security warning if PROXY_API_KEY is missing (but .env exists)
         elif not os.getenv("PROXY_API_KEY"):
             self.console.print()
+            self.console.print(
+                Panel(
+                    Text.from_markup(
+                        "⚠️  [bold red]SECURITY WARNING: PROXY_API_KEY Not Set[/bold red]\n\n"
+                        "Your proxy is currently UNSECURED!\n"
+                        "Anyone can access it without authentication.\n\n"
+                        "This is a serious security risk if your proxy is accessible\n"
+                        "from the internet or untrusted networks.\n\n"
+                        "👉 [bold]Recommended:[/bold] Set PROXY_API_KEY in .env file\n"
+                        '   Use option "2. Configure Proxy Settings" → "3. Set Proxy API Key"\n'
+                        '   or option "3. Manage Credentials"'
+                    ),
+                    border_style="red",
+                    expand=False,
+                )
+            )
         # Show config
         self.console.print()
         self.console.print("[bold]📋 Proxy Configuration[/bold]")
         self.console.print("━" * 70)
         self.console.print(f"   Host:                {self.config.config['host']}")
         self.console.print(f"   Port:                {self.config.config['port']}")
+        self.console.print(
+            f"   Request Logging:     {'✅ Enabled' if self.config.config['enable_request_logging'] else '❌ Disabled'}"
+        )
         # Show actual API key value
+        proxy_key = os.getenv("PROXY_API_KEY")
         if proxy_key:
             self.console.print(f"   Proxy API Key:       {proxy_key}")
         else:
             self.console.print("   Proxy API Key:       [red]Not Set (INSECURE!)[/red]")
         # Show status summary
         self.console.print()
         self.console.print("[bold]📊 Status Summary[/bold]")
         provider_count = len(credentials)
         custom_count = len(custom_bases)
         provider_settings = settings.get("provider_settings", {})
+        has_advanced = bool(
+            settings["model_definitions"]
+            or settings["concurrency_limits"]
+            or settings["model_filters"]
+            or provider_settings
+        )
         self.console.print(f"   Providers:           {provider_count} configured")
         self.console.print(f"   Custom Providers:    {custom_count} configured")
+        self.console.print(
+            f"   Advanced Settings:   {'Active (view in menu 4)' if has_advanced else 'None'}"
+        )
         # Show menu
         self.console.print()
         self.console.print("━" * 70)
         if show_warning:
             self.console.print("   1. ▶️  Run Proxy Server")
             self.console.print("   2. ⚙️  Configure Proxy Settings")
+            self.console.print(
+                "   3. 🔑 Manage Credentials            ⬅️  [bold yellow]Start here![/bold yellow]"
+            )
         else:
             self.console.print("   1. ▶️  Run Proxy Server")
             self.console.print("   2. ⚙️  Configure Proxy Settings")
             self.console.print("   3. 🔑 Manage Credentials")
         self.console.print("   4. 📊 View Provider & Advanced Settings")
         self.console.print("   5. 🔄 Reload Configuration")
         self.console.print("   6. ℹ️  About")
         self.console.print("   7. 🚪 Exit")
         self.console.print()
         self.console.print("━" * 70)
         self.console.print()
+        choice = Prompt.ask(
+            "Select option",
+            choices=["1", "2", "3", "4", "5", "6", "7"],
+            show_choices=False,
+        )
         if choice == "1":
             self.run_proxy()
         elif choice == "2":
         elif choice == "4":
             self.show_provider_settings_menu()
         elif choice == "5":
+            load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
             self.config = LauncherConfig()  # Reload config
             self.console.print("\n[green]✅ Configuration reloaded![/green]")
         elif choice == "6":
         elif choice == "7":
             self.running = False
             sys.exit(0)
+    def confirm_setting_change(self, setting_name: str, warning_lines: list) -> bool:
+        """
+        Display a warning and require Y/N (case-sensitive) confirmation.
+        Re-prompts until user enters exactly 'Y' or 'N'.
+        Returns True only if user enters 'Y'.
+        """
+        clear_screen()
+        self.console.print()
+        self.console.print(
+            Panel(
+                Text.from_markup(
+                    f"[bold yellow]⚠️  WARNING: You are about to change the {setting_name}[/bold yellow]\n\n"
+                    + "\n".join(warning_lines)
+                    + "\n\n[bold]If you are not sure about changing this - don't.[/bold]"
+                ),
+                border_style="yellow",
+                expand=False,
+            )
+        )
+        while True:
+            response = Prompt.ask(
+                "Enter [bold]Y[/bold] to confirm, [bold]N[/bold] to cancel (case-sensitive)"
+            )
+            if response == "Y":
+                return True
+            elif response == "N":
+                self.console.print("\n[dim]Operation cancelled.[/dim]")
+                return False
+            else:
+                self.console.print(
+                    "[red]Please enter exactly 'Y' or 'N' (case-sensitive)[/red]"
+                )
     def show_config_menu(self):
         """Display configuration sub-menu"""
         while True:
             clear_screen()
+            self.console.print(
+                Panel.fit(
+                    "[bold cyan]⚙️  Proxy Configuration[/bold cyan]", border_style="cyan"
+                )
+            )
             self.console.print()
             self.console.print("[bold]📋 Current Settings[/bold]")
             self.console.print("━" * 70)
             self.console.print(f"   Host:                {self.config.config['host']}")
             self.console.print(f"   Port:                {self.config.config['port']}")
+            self.console.print(
+                f"   Request Logging:     {'✅ Enabled' if self.config.config['enable_request_logging'] else '❌ Disabled'}"
+            )
+            self.console.print(
+                f"   Proxy API Key:       {'✅ Set' if os.getenv('PROXY_API_KEY') else '❌ Not Set'}"
+            )
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             self.console.print("   2. 🔌 Set Port")
             self.console.print("   3. 🔑 Set Proxy API Key")
             self.console.print("   4. 📝 Toggle Request Logging")
+            self.console.print("   5. 🔄 Reset to Default Settings")
+            self.console.print("   6. ↩️  Back to Main Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
+            choice = Prompt.ask(
+                "Select option",
+                choices=["1", "2", "3", "4", "5", "6"],
+                show_choices=False,
+            )
             if choice == "1":
+                # Show warning and require confirmation
+                confirmed = self.confirm_setting_change(
+                    "Host IP",
+                    [
+                        "Changing the host IP affects which network interfaces the proxy listens on:",
+                        "  • [cyan]127.0.0.1[/cyan] = Local access only (recommended for development)",
+                        "  • [cyan]0.0.0.0[/cyan] = Accessible from all network interfaces",
+                        "",
+                        "Applications configured to connect to the old host may fail to connect.",
+                    ],
+                )
+                if not confirmed:
+                    continue
+                new_host = Prompt.ask(
+                    "Enter new host IP", default=self.config.config["host"]
+                )
                 self.config.update(host=new_host)
                 self.console.print(f"\n[green]✅ Host updated to: {new_host}[/green]")
             elif choice == "2":
+                # Show warning and require confirmation
+                confirmed = self.confirm_setting_change(
+                    "Port",
+                    [
+                        "Changing the port will affect all applications currently configured",
+                        "to connect to your proxy on the existing port.",
+                        "",
+                        "Applications using the old port will fail to connect.",
+                    ],
+                )
+                if not confirmed:
+                    continue
+                new_port = IntPrompt.ask(
+                    "Enter new port", default=self.config.config["port"]
+                )
                 if 1 <= new_port <= 65535:
                     self.config.update(port=new_port)
+                    self.console.print(
+                        f"\n[green]✅ Port updated to: {new_port}[/green]"
+                    )
                 else:
                     self.console.print("\n[red]❌ Port must be between 1-65535[/red]")
             elif choice == "3":
+                # Show warning and require confirmation
+                confirmed = self.confirm_setting_change(
+                    "Proxy API Key",
+                    [
+                        "This is the authentication key that applications use to access your proxy.",
+                        "",
+                        "[bold red]⚠️  Changing this will BREAK all applications currently configured",
+                        "   with the existing API key![/bold red]",
+                        "",
+                        "[bold cyan]💡 If you want to add provider API keys (OpenAI, Gemini, etc.),",
+                        '   go to "3. 🔑 Manage Credentials" in the main menu instead.[/bold cyan]',
+                    ],
+                )
+                if not confirmed:
+                    continue
                 current = os.getenv("PROXY_API_KEY", "")
+                new_key = Prompt.ask(
+                    "Enter new Proxy API Key (leave empty to disable authentication)",
+                    default=current,
+                )
+                if new_key != current:
+                    # If setting to empty, show additional warning
+                    if not new_key:
+                        self.console.print(
+                            "\n[bold red]⚠️  Authentication will be DISABLED - anyone can access your proxy![/bold red]"
+                        )
+                        Prompt.ask("Press Enter to continue", default="")
                     LauncherConfig.update_proxy_api_key(new_key)
+                    if new_key:
+                        self.console.print(
+                            "\n[green]✅ Proxy API Key updated successfully![/green]"
+                        )
+                        self.console.print("   Updated in .env file")
+                    else:
+                        self.console.print(
+                            "\n[yellow]⚠️  Proxy API Key cleared - authentication disabled![/yellow]"
+                        )
+                        self.console.print("   Updated in .env file")
                 else:
                     self.console.print("\n[yellow]No changes made[/yellow]")
             elif choice == "4":
                 current = self.config.config["enable_request_logging"]
                 self.config.update(enable_request_logging=not current)
+                self.console.print(
+                    f"\n[green]✅ Request Logging {'enabled' if not current else 'disabled'}![/green]"
+                )
             elif choice == "5":
+                # Reset to Default Settings
+                # Define defaults
+                default_host = "127.0.0.1"
+                default_port = 8000
+                default_logging = False
+                default_api_key = "VerysecretKey"
+                # Get current values
+                current_host = self.config.config["host"]
+                current_port = self.config.config["port"]
+                current_logging = self.config.config["enable_request_logging"]
+                current_api_key = os.getenv("PROXY_API_KEY", "")
+                # Build comparison table
+                warning_lines = [
+                    "This will reset ALL proxy settings to their defaults:",
+                    "",
+                    "[bold]   Setting              Current Value         →  Default Value[/bold]",
+                    "   " + "─" * 62,
+                    f"   Host IP              {current_host:20} →  {default_host}",
+                    f"   Port                 {str(current_port):20} →  {default_port}",
+                    f"   Request Logging      {'Enabled':20} →  Disabled"
+                    if current_logging
+                    else f"   Request Logging      {'Disabled':20} →  Disabled",
+                    f"   Proxy API Key        {current_api_key[:20]:20} →  {default_api_key}",
+                    "",
+                    "[bold red]⚠️  This may break applications configured with current settings![/bold red]",
+                ]
+                confirmed = self.confirm_setting_change(
+                    "Settings (Reset to Defaults)", warning_lines
+                )
+                if not confirmed:
+                    continue
+                # Apply defaults
+                self.config.update(
+                    host=default_host,
+                    port=default_port,
+                    enable_request_logging=default_logging,
+                )
+                LauncherConfig.update_proxy_api_key(default_api_key)
+                self.console.print(
+                    "\n[green]✅ All settings have been reset to defaults![/green]"
+                )
+                self.console.print(f"   Host:             {default_host}")
+                self.console.print(f"   Port:             {default_port}")
+                self.console.print(f"   Request Logging:  Disabled")
+                self.console.print(f"   Proxy API Key:    {default_api_key}")
+            elif choice == "6":
                 break
     def show_provider_settings_menu(self):
         """Display provider/advanced settings (read-only + launch tool)"""
         clear_screen()
         settings = SettingsDetector.get_all_settings()
         credentials = settings["credentials"]
         custom_bases = settings["custom_bases"]
         concurrency = settings["concurrency_limits"]
         filters = settings["model_filters"]
         provider_settings = settings.get("provider_settings", {})
+        self.console.print(
+            Panel.fit(
+                "[bold cyan]📊 Provider & Advanced Settings[/bold cyan]",
+                border_style="cyan",
+            )
+        )
         # Configured Providers
         self.console.print()
         self.console.print("[bold]📊 Configured Providers[/bold]")
                 provider_name = provider.title()
                 parts = []
                 if info["api_keys"] > 0:
+                    parts.append(
+                        f"{info['api_keys']} API key{'s' if info['api_keys'] > 1 else ''}"
+                    )
                 if info["oauth"] > 0:
+                    parts.append(
+                        f"{info['oauth']} OAuth credential{'s' if info['oauth'] > 1 else ''}"
+                    )
                 display = " + ".join(parts)
                 if info["custom"]:
                     display += " (Custom)"
                 self.console.print(f"   ✅ {provider_name:20} {display}")
         else:
             self.console.print("   [dim]No providers configured[/dim]")
         # Custom API Bases
         if custom_bases:
             self.console.print()
             self.console.print("━" * 70)
             for provider, base in custom_bases.items():
                 self.console.print(f"   • {provider:15} {base}")
         # Model Definitions
         if model_defs:
             self.console.print()
             self.console.print("[bold]📦 Provider Model Definitions[/bold]")
             self.console.print("━" * 70)
             for provider, count in model_defs.items():
+                self.console.print(
+                    f"   • {provider:15} {count} model{'s' if count > 1 else ''} configured"
+                )
         # Concurrency Limits
         if concurrency:
             self.console.print()
             for provider, limit in concurrency.items():
                 self.console.print(f"   • {provider:15} {limit} requests/key")
             self.console.print("   • Default:        1 request/key (all others)")
         # Model Filters (basic info only)
         if filters:
             self.console.print()
                     status_parts.append("Ignore list")
                 status = " + ".join(status_parts) if status_parts else "None"
                 self.console.print(f"   • {provider:15} ✅ {status}")
         # Provider-Specific Settings
         self.console.print()
         self.console.print("[bold]🔬 Provider-Specific Settings[/bold]")
             display_name = provider.replace("_", " ").title()
             modified = provider_settings.get(provider, 0)
             if modified > 0:
+                self.console.print(
+                    f"   • {display_name:20} [yellow]{modified} setting{'s' if modified > 1 else ''} modified[/yellow]"
+                )
             else:
                 self.console.print(f"   • {display_name:20} [dim]using defaults[/dim]")
         # Actions
         self.console.print()
         self.console.print("━" * 70)
         self.console.print()
         self.console.print("[bold]💡 Actions[/bold]")
         self.console.print()
+        self.console.print(
+            "   1. 🔧 Launch Settings Tool      (configure advanced settings)"
+        )
         self.console.print("   2. ↩️  Back to Main Menu")
         self.console.print()
         self.console.print("━" * 70)
+        self.console.print(
+            "[dim]ℹ️  Advanced settings are stored in .env file.\n   Use the Settings Tool to configure them interactively.[/dim]"
+        )
         self.console.print()
+        self.console.print(
+            "[dim]⚠️  Note: Settings Tool supports only common configuration types.\n   For complex settings, edit .env directly.[/dim]"
+        )
         self.console.print()
         choice = Prompt.ask("Select option", choices=["1", "2"], show_choices=False)
         if choice == "1":
             self.launch_settings_tool()
         # choice == "2" returns to main menu
     def launch_credential_tool(self):
         """Launch credential management tool"""
         import time
         # CRITICAL: Show full loading UI to replace the 6-7 second blank wait
         clear_screen()
         _start_time = time.time()
         # Show the same header as standalone mode
         self.console.print("━" * 70)
         self.console.print("Interactive Credential Setup Tool")
         self.console.print("GitHub: https://github.com/Mirrowel/LLM-API-Key-Proxy")
         self.console.print("━" * 70)
         self.console.print("Loading credential management components...")
         # Now import with spinner (this is where the 6-7 second delay happens)
         with self.console.status("Initializing credential tool...", spinner="dots"):
+            from rotator_library.credential_tool import (
+                run_credential_tool,
+                _ensure_providers_loaded,
+            )
             _, PROVIDER_PLUGINS = _ensure_providers_loaded()
         self.console.print("✓ Credential tool initialized")
         _elapsed = time.time() - _start_time
+        self.console.print(
+            f"✓ Tool ready in {_elapsed:.2f}s ({len(PROVIDER_PLUGINS)} providers available)"
+        )
         # Small delay to let user see the ready message
         time.sleep(0.5)
         # Run the tool with from_launcher=True to skip duplicate loading screen
         run_credential_tool(from_launcher=True)
         # Reload environment after credential tool
         load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
     def launch_settings_tool(self):
         """Launch settings configuration tool"""
         from proxy_app.settings_tool import run_settings_tool
         run_settings_tool()
         # Reload environment after settings tool
         load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
     def show_about(self):
         """Display About page with project information"""
         clear_screen()
+        self.console.print(
+            Panel.fit(
+                "[bold cyan]ℹ️  About LLM API Key Proxy[/bold cyan]", border_style="cyan"
+            )
+        )
         self.console.print()
         self.console.print("[bold]📦 Project Information[/bold]")
         self.console.print("━" * 70)
         self.console.print("   [bold cyan]LLM API Key Proxy[/bold cyan]")
+        self.console.print(
+            "   A lightweight, high-performance proxy server for managing"
+        )
         self.console.print("   LLM API keys with automatic rotation and OAuth support")
         self.console.print()
+        self.console.print(
+            "   [dim]GitHub:[/dim] [blue underline]https://github.com/Mirrowel/LLM-API-Key-Proxy[/blue underline]"
+        )
         self.console.print()
         self.console.print("[bold]✨ Key Features[/bold]")
         self.console.print("━" * 70)
+        self.console.print(
+            "   • [green]Smart Key Rotation[/green] - Automatic rotation across multiple API keys"
+        )
+        self.console.print(
+            "   • [green]OAuth Support[/green] - Automated OAuth flows for supported providers"
+        )
+        self.console.print(
+            "   • [green]Multiple Providers[/green] - Support for 10+ LLM providers"
+        )
+        self.console.print(
+            "   • [green]Custom Providers[/green] - Easy integration of custom OpenAI-compatible APIs"
+        )
+        self.console.print(
+            "   • [green]Advanced Filtering[/green] - Model whitelists and ignore lists per provider"
+        )
+        self.console.print(
+            "   • [green]Concurrency Control[/green] - Per-key rate limiting and request management"
+        )
+        self.console.print(
+            "   • [green]Cost Tracking[/green] - Track usage and costs across all providers"
+        )
+        self.console.print(
+            "   • [green]Interactive TUI[/green] - Beautiful terminal interface for easy configuration"
+        )
         self.console.print()
         self.console.print("[bold]📝 License & Credits[/bold]")
         self.console.print("━" * 70)
         self.console.print("   Made with ❤️  by the community")
         self.console.print("   Open source - contributions welcome!")
         self.console.print()
         self.console.print("━" * 70)
         self.console.print()
         Prompt.ask("Press Enter to return to main menu", default="")
     def run_proxy(self):
         """Prepare and launch proxy in same window"""
         # Check if forced onboarding needed
         if self.needs_onboarding():
             clear_screen()
+            self.console.print(
+                Panel(
+                    Text.from_markup(
+                        "⚠️  [bold yellow]Setup Required[/bold yellow]\n\n"
+                        "Cannot start without .env.\n"
+                        "Launching credential tool..."
+                    ),
+                    border_style="yellow",
+                )
+            )
             # Force credential tool
+            from rotator_library.credential_tool import (
+                ensure_env_defaults,
+                run_credential_tool,
+            )
             ensure_env_defaults()
             load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
             run_credential_tool()
             load_dotenv(dotenv_path=Path.cwd() / ".env", override=True)
             # Check again after credential tool
             if not os.getenv("PROXY_API_KEY"):
+                self.console.print(
+                    "\n[red]❌ PROXY_API_KEY still not set. Cannot start proxy.[/red]"
+                )
                 return
         # Clear console and modify sys.argv
         clear_screen()
+        self.console.print(
+            f"\n[bold green]🚀 Starting proxy on {self.config.config['host']}:{self.config.config['port']}...[/bold green]\n"
+        )
         # Clear console again to remove the starting message before main.py shows loading details
         import time
         time.sleep(0.5)  # Brief pause so user sees the message
         clear_screen()
         # Reconstruct sys.argv for main.py
         sys.argv = [
             "main.py",
+            "--host",
+            self.config.config["host"],
+            "--port",
+            str(self.config.config["port"]),
         ]
         if self.config.config["enable_request_logging"]:
             sys.argv.append("--enable-request-logging")
         # Exit TUI - main.py will continue execution
         self.running = False

src/proxy_app/main.py CHANGED Viewed

@@ -10,10 +10,18 @@ import logging
 # --- Argument Parsing (BEFORE heavy imports) ---
 parser = argparse.ArgumentParser(description="API Key Proxy Server")
-parser.add_argument("--host", type=str, default="0.0.0.0", help="Host to bind the server to.")
 parser.add_argument("--port", type=int, default=8000, help="Port to run the server on.")
-parser.add_argument("--enable-request-logging", action="store_true", help="Enable request logging.")
-parser.add_argument("--add-credential", action="store_true", help="Launch the interactive tool to add a new OAuth credential.")
 args, _ = parser.parse_known_args()
 # Add the 'src' directory to the Python path
@@ -23,6 +31,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent))
 if len(sys.argv) == 1:
     # TUI MODE - Load ONLY what's needed for the launcher (fast path!)
     from proxy_app.launcher_tui import run_launcher_tui
     run_launcher_tui()
     # Launcher modifies sys.argv and returns, or exits if user chose Exit
     # If we get here, user chose "Run Proxy" and sys.argv is modified
@@ -32,6 +41,7 @@ if len(sys.argv) == 1:
 # Check if credential tool mode (also doesn't need heavy proxy imports)
 if args.add_credential:
     from rotator_library.credential_tool import run_credential_tool
     run_credential_tool()
     sys.exit(0)
@@ -74,6 +84,7 @@ print("Loading server components...")
 # Phase 2: Load Rich for loading spinner (lightweight)
 from rich.console import Console
 _console = Console()
 # Phase 3: Heavy dependencies with granular loading messages
@@ -92,7 +103,7 @@ with _console.status("[dim]Loading core dependencies...", spinner="dots"):
     import json
     from typing import AsyncGenerator, Any, List, Optional, Union
     from pydantic import BaseModel, Field
     # --- Early Log Level Configuration ---
     logging.getLogger("LiteLLM").setLevel(logging.WARNING)
@@ -100,7 +111,7 @@ print("  → Loading LiteLLM library...")
 with _console.status("[dim]Loading LiteLLM library...", spinner="dots"):
     import litellm
-# Phase 4: Application imports with granular loading messages
 print("  → Initializing proxy core...")
 with _console.status("[dim]Initializing proxy core...", spinner="dots"):
     from rotator_library import RotatingClient
@@ -115,12 +126,15 @@ print("  → Discovering provider plugins...")
 # Provider lazy loading happens during import, so time it here
 _provider_start = time.time()
 with _console.status("[dim]Discovering provider plugins...", spinner="dots"):
-    from rotator_library import PROVIDER_PLUGINS  # This triggers lazy load via __getattr__
 _provider_time = time.time() - _provider_start
 # Get count after import (without timing to avoid double-counting)
 _plugin_count = len(PROVIDER_PLUGINS)
 # --- Pydantic Models ---
 class EmbeddingRequest(BaseModel):
     model: str
@@ -129,15 +143,19 @@ class EmbeddingRequest(BaseModel):
     dimensions: Optional[int] = None
     user: Optional[str] = None
 class ModelCard(BaseModel):
     """Basic model card for minimal response."""
     id: str
     object: str = "model"
     created: int = Field(default_factory=lambda: int(time.time()))
     owned_by: str = "Mirro-Proxy"
 class ModelCapabilities(BaseModel):
     """Model capability flags."""
     tool_choice: bool = False
     function_calling: bool = False
     reasoning: bool = False
@@ -146,8 +164,10 @@ class ModelCapabilities(BaseModel):
     prompt_caching: bool = False
     assistant_prefill: bool = False
 class EnrichedModelCard(BaseModel):
     """Extended model card with pricing and capabilities."""
     id: str
     object: str = "model"
     created: int = Field(default_factory=lambda: int(time.time()))
@@ -169,28 +189,36 @@ class EnrichedModelCard(BaseModel):
     # Debug info (optional)
     _sources: Optional[List[str]] = None
     _match_type: Optional[str] = None
     class Config:
         extra = "allow"  # Allow extra fields from the service
 class ModelList(BaseModel):
     """List of models response."""
     object: str = "list"
     data: List[ModelCard]
 class EnrichedModelList(BaseModel):
     """List of enriched models with pricing and capabilities."""
     object: str = "list"
     data: List[EnrichedModelCard]
 # Calculate total loading time
 _elapsed = time.time() - _start_time
-print(f"✓ Server ready in {_elapsed:.2f}s ({_plugin_count} providers discovered in {_provider_time:.2f}s)")
 # Clear screen and reprint header for clean startup view
 # This pushes loading messages up (still in scroll history) but shows a clean final screen
 import os as _os_module
-_os_module.system('cls' if _os_module.name == 'nt' else 'clear')
 # Reprint header
 print("━" * 70)
@@ -198,7 +226,9 @@ print(f"Starting proxy on {args.host}:{args.port}")
 print(f"Proxy API Key: {key_display}")
 print(f"GitHub: https://github.com/Mirrowel/LLM-API-Key-Proxy")
 print("━" * 70)
-print(f"✓ Server ready in {_elapsed:.2f}s ({_plugin_count} providers discovered in {_provider_time:.2f}s)")
 # Note: Debug logging will be added after logging configuration below
@@ -211,52 +241,64 @@ LOG_DIR.mkdir(exist_ok=True)
 console_handler = colorlog.StreamHandler(sys.stdout)
 console_handler.setLevel(logging.INFO)
 formatter = colorlog.ColoredFormatter(
-    '%(log_color)s%(message)s',
     log_colors={
-        'DEBUG':    'cyan',
-        'INFO':     'green',
-        'WARNING':  'yellow',
-        'ERROR':    'red',
-        'CRITICAL': 'red,bg_white',
-    }
 )
 console_handler.setFormatter(formatter)
 # Configure a file handler for INFO-level logs and higher
 info_file_handler = logging.FileHandler(LOG_DIR / "proxy.log", encoding="utf-8")
 info_file_handler.setLevel(logging.INFO)
-info_file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
 # Configure a dedicated file handler for all DEBUG-level logs
 debug_file_handler = logging.FileHandler(LOG_DIR / "proxy_debug.log", encoding="utf-8")
 debug_file_handler.setLevel(logging.DEBUG)
-debug_file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
 # Create a filter to ensure the debug handler ONLY gets DEBUG messages from the rotator_library
 class RotatorDebugFilter(logging.Filter):
     def filter(self, record):
-        return record.levelno == logging.DEBUG and record.name.startswith('rotator_library')
 debug_file_handler.addFilter(RotatorDebugFilter())
 # Configure a console handler with color
 console_handler = colorlog.StreamHandler(sys.stdout)
 console_handler.setLevel(logging.INFO)
 formatter = colorlog.ColoredFormatter(
-    '%(log_color)s%(message)s',
     log_colors={
-        'DEBUG':    'cyan',
-        'INFO':     'green',
-        'WARNING':  'yellow',
-        'ERROR':    'red',
-        'CRITICAL': 'red,bg_white',
-    }
 )
 console_handler.setFormatter(formatter)
 # Add a filter to prevent any LiteLLM logs from cluttering the console
 class NoLiteLLMLogFilter(logging.Filter):
     def filter(self, record):
-        return not record.name.startswith('LiteLLM')
 console_handler.addFilter(NoLiteLLMLogFilter())
 # Get the root logger and set it to DEBUG to capture all messages
@@ -306,18 +348,26 @@ ignore_models = {}
 for key, value in os.environ.items():
     if key.startswith("IGNORE_MODELS_"):
         provider = key.replace("IGNORE_MODELS_", "").lower()
-        models_to_ignore = [model.strip() for model in value.split(',') if model.strip()]
         ignore_models[provider] = models_to_ignore
-        logging.debug(f"Loaded ignore list for provider '{provider}': {models_to_ignore}")
 # Load model whitelist from environment variables
 whitelist_models = {}
 for key, value in os.environ.items():
     if key.startswith("WHITELIST_MODELS_"):
         provider = key.replace("WHITELIST_MODELS_", "").lower()
-        models_to_whitelist = [model.strip() for model in value.split(',') if model.strip()]
         whitelist_models[provider] = models_to_whitelist
-        logging.debug(f"Loaded whitelist for provider '{provider}': {models_to_whitelist}")
 # Load max concurrent requests per key from environment variables
 max_concurrent_requests_per_key = {}
@@ -327,12 +377,19 @@ for key, value in os.environ.items():
         try:
             max_concurrent = int(value)
             if max_concurrent < 1:
-                logging.warning(f"Invalid max_concurrent value for provider '{provider}': {value}. Must be >= 1. Using default (1).")
                 max_concurrent = 1
             max_concurrent_requests_per_key[provider] = max_concurrent
-            logging.debug(f"Loaded max concurrent requests for provider '{provider}': {max_concurrent}")
         except ValueError:
-            logging.warning(f"Invalid max_concurrent value for provider '{provider}': {value}. Using default (1).")
 # --- Lifespan Management ---
 @asynccontextmanager
@@ -349,11 +406,11 @@ async def lifespan(app: FastAPI):
     if not skip_oauth_init and oauth_credentials:
         logging.info("Starting OAuth credential validation and deduplication...")
         processed_emails = {}  # email -> {provider: path}
-        credentials_to_initialize = {} # provider -> [paths]
         final_oauth_credentials = {}
         # --- Pass 1: Pre-initialization Scan & Deduplication ---
-        #logging.info("Pass 1: Scanning for existing metadata to find duplicates...")
         for provider, paths in oauth_credentials.items():
             if provider not in credentials_to_initialize:
                 credentials_to_initialize[provider] = []
@@ -362,9 +419,9 @@ async def lifespan(app: FastAPI):
                 if path.startswith("env://"):
                     credentials_to_initialize[provider].append(path)
                     continue
                 try:
-                    with open(path, 'r') as f:
                         data = json.load(f)
                     metadata = data.get("_proxy_metadata", {})
                     email = metadata.get("email")
@@ -372,28 +429,32 @@ async def lifespan(app: FastAPI):
                     if email:
                         if email not in processed_emails:
                             processed_emails[email] = {}
                         if provider in processed_emails[email]:
                             original_path = processed_emails[email][provider]
-                            logging.warning(f"Duplicate for '{email}' on '{provider}' found in pre-scan: '{Path(path).name}'. Original: '{Path(original_path).name}'. Skipping.")
                             continue
                         else:
                             processed_emails[email][provider] = path
                     credentials_to_initialize[provider].append(path)
                 except (FileNotFoundError, json.JSONDecodeError) as e:
-                    logging.warning(f"Could not pre-read metadata from '{path}': {e}. Will process during initialization.")
                     credentials_to_initialize[provider].append(path)
         # --- Pass 2: Parallel Initialization of Filtered Credentials ---
-        #logging.info("Pass 2: Initializing unique credentials and performing final check...")
         async def process_credential(provider: str, path: str, provider_instance):
             """Process a single credential: initialize and fetch user info."""
             try:
                 await provider_instance.initialize_token(path)
-                if not hasattr(provider_instance, 'get_user_info'):
                     return (provider, path, None, None)
                 user_info = await provider_instance.get_user_info(path)
@@ -401,7 +462,9 @@ async def lifespan(app: FastAPI):
                 return (provider, path, email, None)
             except Exception as e:
-                logging.error(f"Failed to process OAuth token for {provider} at '{path}': {e}")
                 return (provider, path, None, e)
         # Collect all tasks for parallel execution
@@ -413,9 +476,9 @@ async def lifespan(app: FastAPI):
             provider_plugin_class = PROVIDER_PLUGINS.get(provider)
             if not provider_plugin_class:
                 continue
             provider_instance = provider_plugin_class()
             for path in paths:
                 tasks.append(process_credential(provider, path, provider_instance))
@@ -430,7 +493,7 @@ async def lifespan(app: FastAPI):
                 continue
             provider, path, email, error = result
             # Skip if there was an error
             if error:
                 continue
@@ -444,7 +507,9 @@ async def lifespan(app: FastAPI):
             # Handle empty email
             if not email:
-                logging.warning(f"Could not retrieve email for '{path}'. Treating as unique.")
                 if provider not in final_oauth_credentials:
                     final_oauth_credentials[provider] = []
                 final_oauth_credentials[provider].append(path)
@@ -453,10 +518,15 @@ async def lifespan(app: FastAPI):
             # Deduplication check
             if email not in processed_emails:
                 processed_emails[email] = {}
-            if provider in processed_emails[email] and processed_emails[email][provider] != path:
                 original_path = processed_emails[email][provider]
-                logging.warning(f"Duplicate for '{email}' on '{provider}' found post-init: '{Path(path).name}'. Original: '{Path(original_path).name}'. Skipping.")
                 continue
             else:
                 processed_emails[email][provider] = path
@@ -467,7 +537,7 @@ async def lifespan(app: FastAPI):
                 # Update metadata (skip for env-based credentials - they don't have files)
                 if not path.startswith("env://"):
                     try:
-                        with open(path, 'r+') as f:
                             data = json.load(f)
                             metadata = data.get("_proxy_metadata", {})
                             metadata["email"] = email
@@ -490,15 +560,15 @@ async def lifespan(app: FastAPI):
     # The client now uses the root logger configuration
     client = RotatingClient(
         api_keys=api_keys,
-        oauth_credentials=oauth_credentials, # Pass OAuth config
         configure_logging=True,
         litellm_provider_params=litellm_provider_params,
         ignore_models=ignore_models,
         whitelist_models=whitelist_models,
         enable_request_logging=ENABLE_REQUEST_LOGGING,
-        max_concurrent_requests_per_key=max_concurrent_requests_per_key
     )
     # Log loaded credentials summary (compact, always visible for deployment verification)
     #_api_summary = ', '.join([f"{p}:{len(c)}" for p, c in api_keys.items()]) if api_keys else "none"
     #_oauth_summary = ', '.join([f"{p}:{len(c)}" for p, c in oauth_credentials.items()]) if oauth_credentials else "none"
@@ -506,17 +576,19 @@ async def lifespan(app: FastAPI):
     #print(f"🔑 Credentials loaded: {_total_summary} (API: {_api_summary} | OAuth: {_oauth_summary})")
     client.background_refresher.start() # Start the background task
     app.state.rotating_client = client
     # Warn if no provider credentials are configured
     if not client.all_credentials:
         logging.warning("=" * 70)
         logging.warning("⚠️  NO PROVIDER CREDENTIALS CONFIGURED")
         logging.warning("The proxy is running but cannot serve any LLM requests.")
-        logging.warning("Launch the credential tool to add API keys or OAuth credentials.")
         logging.warning("  • Executable: Run with --add-credential flag")
         logging.warning("  • Source: python src/proxy_app/main.py --add-credential")
         logging.warning("=" * 70)
     os.environ["LITELLM_LOG"] = "ERROR"
     litellm.set_verbose = False
     litellm.drop_params = True
@@ -527,29 +599,30 @@ async def lifespan(app: FastAPI):
     else:
         app.state.embedding_batcher = None
         logging.info("RotatingClient initialized (EmbeddingBatcher disabled).")
     # Start model info service in background (fetches pricing/capabilities data)
     # This runs asynchronously and doesn't block proxy startup
     model_info_service = await init_model_info_service()
     app.state.model_info_service = model_info_service
     logging.info("Model info service started (fetching pricing data in background).")
     yield
-    await client.background_refresher.stop() # Stop the background task on shutdown
     if app.state.embedding_batcher:
         await app.state.embedding_batcher.stop()
     await client.close()
     # Stop model info service
-    if hasattr(app.state, 'model_info_service') and app.state.model_info_service:
         await app.state.model_info_service.stop()
     if app.state.embedding_batcher:
         logging.info("RotatingClient and EmbeddingBatcher closed.")
     else:
         logging.info("RotatingClient closed.")
 # --- FastAPI App Setup ---
 app = FastAPI(lifespan=lifespan)
@@ -563,25 +636,32 @@ app.add_middleware(
 )
 api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
 def get_rotating_client(request: Request) -> RotatingClient:
     """Dependency to get the rotating client instance from the app state."""
     return request.app.state.rotating_client
 def get_embedding_batcher(request: Request) -> EmbeddingBatcher:
     """Dependency to get the embedding batcher instance from the app state."""
     return request.app.state.embedding_batcher
 async def verify_api_key(auth: str = Depends(api_key_header)):
     """Dependency to verify the proxy API key."""
     if not auth or auth != f"Bearer {PROXY_API_KEY}":
         raise HTTPException(status_code=401, detail="Invalid or missing API Key")
     return auth
 async def streaming_response_wrapper(
     request: Request,
     request_data: dict,
     response_stream: AsyncGenerator[str, None],
-    logger: Optional[DetailedLogger] = None
 ) -> AsyncGenerator[str, None]:
     """
     Wraps a streaming response to log the full response after completion
@@ -589,7 +669,7 @@ async def streaming_response_wrapper(
     """
     response_chunks = []
     full_response = {}
     try:
         async for chunk_str in response_stream:
             if await request.is_disconnected():
@@ -597,7 +677,7 @@ async def streaming_response_wrapper(
                 break
             yield chunk_str
             if chunk_str.strip() and chunk_str.startswith("data:"):
-                content = chunk_str[len("data:"):].strip()
                 if content != "[DONE]":
                     try:
                         chunk_data = json.loads(content)
@@ -613,15 +693,17 @@ async def streaming_response_wrapper(
             "error": {
                 "message": f"An unexpected error occurred during the stream: {str(e)}",
                 "type": "proxy_internal_error",
-                "code": 500
             }
         }
         yield f"data: {json.dumps(error_payload)}\n\n"
         yield "data: [DONE]\n\n"
         # Also log this as a failed request
         if logger:
-            logger.log_final_response(status_code=500, headers=None, body={"error": str(e)})
-        return # Stop further processing
     finally:
         if response_chunks:
             # --- Aggregation Logic ---
@@ -645,36 +727,56 @@ async def streaming_response_wrapper(
                                 final_message["content"] = ""
                             if value:
                                 final_message["content"] += value
                         elif key == "tool_calls":
                             for tc_chunk in value:
                                 index = tc_chunk["index"]
                                 if index not in aggregated_tool_calls:
-                                    aggregated_tool_calls[index] = {"type": "function", "function": {"name": "", "arguments": ""}}
                                 # Ensure 'function' key exists for this index before accessing its sub-keys
                                 if "function" not in aggregated_tool_calls[index]:
-                                    aggregated_tool_calls[index]["function"] = {"name": "", "arguments": ""}
                                 if tc_chunk.get("id"):
                                     aggregated_tool_calls[index]["id"] = tc_chunk["id"]
                                 if "function" in tc_chunk:
                                     if "name" in tc_chunk["function"]:
                                         if tc_chunk["function"]["name"] is not None:
-                                            aggregated_tool_calls[index]["function"]["name"] += tc_chunk["function"]["name"]
                                     if "arguments" in tc_chunk["function"]:
-                                        if tc_chunk["function"]["arguments"] is not None:
-                                            aggregated_tool_calls[index]["function"]["arguments"] += tc_chunk["function"]["arguments"]
                         elif key == "function_call":
                             if "function_call" not in final_message:
-                                final_message["function_call"] = {"name": "", "arguments": ""}
                             if "name" in value:
                                 if value["name"] is not None:
-                                    final_message["function_call"]["name"] += value["name"]
                             if "arguments" in value:
                                 if value["arguments"] is not None:
-                                    final_message["function_call"]["arguments"] += value["arguments"]
-                        else: # Generic key handling for other data like 'reasoning'
                             # FIX: Role should always replace, never concatenate
                             if key == "role":
                                 final_message[key] = value
@@ -707,7 +809,7 @@ async def streaming_response_wrapper(
             final_choice = {
                 "index": 0,
                 "message": final_message,
-                "finish_reason": finish_reason
             }
             full_response = {
@@ -716,21 +818,22 @@ async def streaming_response_wrapper(
                 "created": first_chunk.get("created"),
                 "model": first_chunk.get("model"),
                 "choices": [final_choice],
-                "usage": usage_data
             }
         if logger:
             logger.log_final_response(
                 status_code=200,
                 headers=None,  # Headers are not available at this stage
-                body=full_response
             )
 @app.post("/v1/chat/completions")
 async def chat_completions(
     request: Request,
     client: RotatingClient = Depends(get_rotating_client),
-    _ = Depends(verify_api_key)
 ):
     """
     OpenAI-compatible endpoint powered by the RotatingClient.
@@ -749,16 +852,24 @@ async def chat_completions(
         # instead of actual schemas, which can cause tool hallucination
         # Modes: "remove" = delete temperature key, "set" = change to 1.0, "false" = disabled
         override_temp_zero = os.getenv("OVERRIDE_TEMPERATURE_ZERO", "false").lower()
-        if override_temp_zero in ("remove", "set", "true", "1", "yes") and "temperature" in request_data and request_data["temperature"] == 0:
             if override_temp_zero == "remove":
                 # Remove temperature key entirely
                 del request_data["temperature"]
-                logging.debug("OVERRIDE_TEMPERATURE_ZERO=remove: Removed temperature=0 from request")
             else:
                 # Set to 1.0 (for "set", "true", "1", "yes")
                 request_data["temperature"] = 1.0
-                logging.debug("OVERRIDE_TEMPERATURE_ZERO=set: Converting temperature=0 to temperature=1.0")
         # If logging is enabled, perform all logging operations using the parsed data.
         if logger:
@@ -766,9 +877,17 @@ async def chat_completions(
         # Extract and log specific reasoning parameters for monitoring.
         model = request_data.get("model")
-        generation_cfg = request_data.get("generationConfig", {}) or request_data.get("generation_config", {}) or {}
-        reasoning_effort = request_data.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
-        custom_reasoning_budget = request_data.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
         logging.getLogger("rotator_library").debug(
             f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
@@ -779,31 +898,41 @@ async def chat_completions(
             url=str(request.url),
             headers=dict(request.headers),
             client_info=(request.client.host, request.client.port),
-            request_data=request_data
         )
         is_streaming = request_data.get("stream", False)
         if is_streaming:
             response_generator = client.acompletion(request=request, **request_data)
             return StreamingResponse(
-                streaming_response_wrapper(request, request_data, response_generator, logger),
-                media_type="text/event-stream"
             )
         else:
             response = await client.acompletion(request=request, **request_data)
             if logger:
                 # Assuming response has status_code and headers attributes
                 # This might need adjustment based on the actual response object
-                response_headers = response.headers if hasattr(response, 'headers') else None
-                status_code = response.status_code if hasattr(response, 'status_code') else 200
                 logger.log_final_response(
                     status_code=status_code,
                     headers=response_headers,
-                    body=response.model_dump()
                 )
             return response
-    except (litellm.InvalidRequestError, ValueError, litellm.ContextWindowExceededError) as e:
         raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
     except litellm.AuthenticationError as e:
         raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
@@ -824,16 +953,19 @@ async def chat_completions(
             except json.JSONDecodeError:
                 request_data = {"error": "Could not parse request body"}
             if logger:
-                logger.log_final_response(status_code=500, headers=None, body={"error": str(e)})
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/v1/embeddings")
 async def embeddings(
     request: Request,
     body: EmbeddingRequest,
     client: RotatingClient = Depends(get_rotating_client),
     batcher: Optional[EmbeddingBatcher] = Depends(get_embedding_batcher),
-    _ = Depends(verify_api_key)
 ):
     """
     OpenAI-compatible endpoint for creating embeddings.
@@ -847,7 +979,7 @@ async def embeddings(
             url=str(request.url),
             headers=dict(request.headers),
             client_info=(request.client.host, request.client.port),
-            request_data=request_data
         )
         if USE_EMBEDDING_BATCHER and batcher:
             # --- Server-Side Batching Logic ---
@@ -861,7 +993,7 @@ async def embeddings(
                 individual_request = request_data.copy()
                 individual_request["input"] = single_input
                 tasks.append(batcher.add_request(individual_request))
             results = await asyncio.gather(*tasks)
             all_data = []
@@ -877,16 +1009,19 @@ async def embeddings(
                 "object": "list",
                 "model": results[0]["model"],
                 "data": all_data,
-                "usage": { "prompt_tokens": total_prompt_tokens, "total_tokens": total_tokens },
             }
             response = litellm.EmbeddingResponse(**final_response_data)
         else:
             # --- Direct Pass-Through Logic ---
             request_data = body.model_dump(exclude_none=True)
             if isinstance(request_data.get("input"), str):
                 request_data["input"] = [request_data["input"]]
             response = await client.aembedding(request=request, **request_data)
         return response
@@ -894,7 +1029,11 @@ async def embeddings(
     except HTTPException as e:
         # Re-raise HTTPException to ensure it's not caught by the generic Exception handler
         raise e
-    except (litellm.InvalidRequestError, ValueError, litellm.ContextWindowExceededError) as e:
         raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
     except litellm.AuthenticationError as e:
         raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
@@ -910,10 +1049,12 @@ async def embeddings(
         logging.error(f"Embedding request failed: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
 def read_root():
     return {"Status": "API Key Proxy is running"}
 @app.get("/v1/models")
 async def list_models(
     request: Request,
@@ -923,22 +1064,30 @@ async def list_models(
 ):
     """
     Returns a list of available models in the OpenAI-compatible format.
     Query Parameters:
         enriched: If True (default), returns detailed model info with pricing and capabilities.
                   If False, returns minimal OpenAI-compatible response.
     """
     model_ids = await client.get_all_available_models(grouped=False)
-    if enriched and hasattr(request.app.state, 'model_info_service'):
         model_info_service = request.app.state.model_info_service
         if model_info_service.is_ready:
             # Return enriched model data
             enriched_data = model_info_service.enrich_model_list(model_ids)
             return {"object": "list", "data": enriched_data}
     # Fallback to basic model cards
-    model_cards = [{"id": model_id, "object": "model", "created": int(time.time()), "owned_by": "Mirro-Proxy"} for model_id in model_ids]
     return {"object": "list", "data": model_cards}
@@ -950,17 +1099,17 @@ async def get_model(
 ):
     """
     Returns detailed information about a specific model.
     Path Parameters:
         model_id: The model ID (e.g., "anthropic/claude-3-opus", "openrouter/openai/gpt-4")
     """
-    if hasattr(request.app.state, 'model_info_service'):
         model_info_service = request.app.state.model_info_service
         if model_info_service.is_ready:
             info = model_info_service.get_model_info(model_id)
             if info:
                 return info.to_dict()
     # Return basic info if service not ready or model not found
     return {
         "id": model_id,
@@ -978,7 +1127,7 @@ async def model_info_stats(
     """
     Returns statistics about the model info service (for monitoring/debugging).
     """
-    if hasattr(request.app.state, 'model_info_service'):
         return request.app.state.model_info_service.get_stats()
     return {"error": "Model info service not initialized"}
@@ -990,11 +1139,12 @@ async def list_providers(_=Depends(verify_api_key)):
     """
     return list(PROVIDER_PLUGINS.keys())
 @app.post("/v1/token-count")
 async def token_count(
-    request: Request,
     client: RotatingClient = Depends(get_rotating_client),
-    _=Depends(verify_api_key)
 ):
     """
     Calculates the token count for a given list of messages and a model.
@@ -1005,7 +1155,9 @@ async def token_count(
         messages = data.get("messages")
         if not model or not messages:
-            raise HTTPException(status_code=400, detail="'model' and 'messages' are required.")
         count = client.token_count(**data)
         return {"token_count": count}
@@ -1016,13 +1168,10 @@ async def token_count(
 @app.post("/v1/cost-estimate")
-async def cost_estimate(
-    request: Request,
-    _=Depends(verify_api_key)
-):
     """
     Estimates the cost for a request based on token counts and model pricing.
     Request body:
         {
             "model": "anthropic/claude-3-opus",
@@ -1031,7 +1180,7 @@ async def cost_estimate(
             "cache_read_tokens": 0,       # optional
             "cache_creation_tokens": 0    # optional
         }
     Returns:
         {
             "model": "anthropic/claude-3-opus",
@@ -1051,25 +1200,28 @@ async def cost_estimate(
         completion_tokens = data.get("completion_tokens", 0)
         cache_read_tokens = data.get("cache_read_tokens", 0)
         cache_creation_tokens = data.get("cache_creation_tokens", 0)
         if not model:
             raise HTTPException(status_code=400, detail="'model' is required.")
         result = {
             "model": model,
             "cost": None,
             "currency": "USD",
             "pricing": {},
-            "source": None
         }
         # Try model info service first
-        if hasattr(request.app.state, 'model_info_service'):
             model_info_service = request.app.state.model_info_service
             if model_info_service.is_ready:
                 cost = model_info_service.calculate_cost(
-                    model, prompt_tokens, completion_tokens,
-                    cache_read_tokens, cache_creation_tokens
                 )
                 if cost is not None:
                     cost_info = model_info_service.get_cost_info(model)
@@ -1077,31 +1229,32 @@ async def cost_estimate(
                     result["pricing"] = cost_info or {}
                     result["source"] = "model_info_service"
                     return result
         # Fallback to litellm
         try:
             import litellm
             # Create a mock response for cost calculation
             model_info = litellm.get_model_info(model)
             input_cost = model_info.get("input_cost_per_token", 0)
             output_cost = model_info.get("output_cost_per_token", 0)
             if input_cost or output_cost:
                 cost = (prompt_tokens * input_cost) + (completion_tokens * output_cost)
                 result["cost"] = cost
                 result["pricing"] = {
                     "input_cost_per_token": input_cost,
-                    "output_cost_per_token": output_cost
                 }
                 result["source"] = "litellm_fallback"
                 return result
         except Exception:
             pass
         result["source"] = "unknown"
         result["error"] = "Pricing data not available for this model"
         return result
     except HTTPException:
         raise
     except Exception as e:
@@ -1112,17 +1265,18 @@ async def cost_estimate(
 if __name__ == "__main__":
     # Define ENV_FILE for onboarding checks
     ENV_FILE = Path.cwd() / ".env"
     # Check if launcher TUI should be shown (no arguments provided)
     if len(sys.argv) == 1:
         # No arguments - show launcher TUI (lazy import)
         from proxy_app.launcher_tui import run_launcher_tui
         run_launcher_tui()
         # Launcher modifies sys.argv and returns, or exits if user chose Exit
         # If we get here, user chose "Run Proxy" and sys.argv is modified
         # Re-parse arguments with modified sys.argv
         args = parser.parse_args()
     def needs_onboarding() -> bool:
         """
         Check if the proxy needs onboarding (first-time setup).
@@ -1132,40 +1286,49 @@ if __name__ == "__main__":
         # PROXY_API_KEY is optional (will show warning if not set)
         if not ENV_FILE.is_file():
             return True
         return False
     def show_onboarding_message():
         """Display clear explanatory message for why onboarding is needed."""
-        os.system('cls' if os.name == 'nt' else 'clear')  # Clear terminal for clean presentation
-        console.print(Panel.fit(
-            "[bold cyan]🚀 LLM API Key Proxy - First Time Setup[/bold cyan]",
-            border_style="cyan"
-        ))
         console.print("[bold yellow]⚠️  Configuration Required[/bold yellow]\n")
         console.print("The proxy needs initial configuration:")
         console.print("  [red]❌ No .env file found[/red]")
         console.print("\n[bold]Why this matters:[/bold]")
         console.print("  • The .env file stores your credentials and settings")
         console.print("  • PROXY_API_KEY protects your proxy from unauthorized access")
         console.print("  • Provider API keys enable LLM access")
         console.print("\n[bold]What happens next:[/bold]")
         console.print("  1. We'll create a .env file with PROXY_API_KEY")
         console.print("  2. You can add LLM provider credentials (API keys or OAuth)")
         console.print("  3. The proxy will then start normally")
-        console.print("\n[bold yellow]⚠️  Note:[/bold yellow] The credential tool adds PROXY_API_KEY by default.")
         console.print("   You can remove it later if you want an unsecured proxy.\n")
-        console.input("[bold green]Press Enter to launch the credential setup tool...[/bold green]")
     # Check if user explicitly wants to add credentials
     if args.add_credential:
         # Import and call ensure_env_defaults to create .env and PROXY_API_KEY if needed
         from rotator_library.credential_tool import ensure_env_defaults
         ensure_env_defaults()
         # Reload environment variables after ensure_env_defaults creates/updates .env
         load_dotenv(override=True)
@@ -1176,36 +1339,35 @@ if __name__ == "__main__":
             # Import console from rich for better messaging
             from rich.console import Console
             from rich.panel import Panel
             console = Console()
             # Show clear explanatory message
             show_onboarding_message()
             # Launch credential tool automatically
             from rotator_library.credential_tool import ensure_env_defaults
             ensure_env_defaults()
             load_dotenv(override=True)
             run_credential_tool()
             # After credential tool exits, reload and re-check
             load_dotenv(override=True)
             # Re-read PROXY_API_KEY from environment
             PROXY_API_KEY = os.getenv("PROXY_API_KEY")
             # Verify onboarding is complete
             if needs_onboarding():
                 console.print("\n[bold red]❌ Configuration incomplete.[/bold red]")
-                console.print("The proxy still cannot start. Please ensure PROXY_API_KEY is set in .env\n")
                 sys.exit(1)
             else:
                 console.print("\n[bold green]✅ Configuration complete![/bold green]")
                 console.print("\nStarting proxy server...\n")
-        # Validate PROXY_API_KEY before starting the server
-        if not PROXY_API_KEY:
-            raise ValueError("PROXY_API_KEY environment variable not set. Please run with --add-credential to set up your environment.")
-        import uvicorn
-        uvicorn.run(app, host=args.host, port=args.port)

 # --- Argument Parsing (BEFORE heavy imports) ---
 parser = argparse.ArgumentParser(description="API Key Proxy Server")
+parser.add_argument(
+    "--host", type=str, default="0.0.0.0", help="Host to bind the server to."
+)
 parser.add_argument("--port", type=int, default=8000, help="Port to run the server on.")
+parser.add_argument(
+    "--enable-request-logging", action="store_true", help="Enable request logging."
+)
+parser.add_argument(
+    "--add-credential",
+    action="store_true",
+    help="Launch the interactive tool to add a new OAuth credential.",
+)
 args, _ = parser.parse_known_args()
 # Add the 'src' directory to the Python path
 if len(sys.argv) == 1:
     # TUI MODE - Load ONLY what's needed for the launcher (fast path!)
     from proxy_app.launcher_tui import run_launcher_tui
     run_launcher_tui()
     # Launcher modifies sys.argv and returns, or exits if user chose Exit
     # If we get here, user chose "Run Proxy" and sys.argv is modified
 # Check if credential tool mode (also doesn't need heavy proxy imports)
 if args.add_credential:
     from rotator_library.credential_tool import run_credential_tool
     run_credential_tool()
     sys.exit(0)
 # Phase 2: Load Rich for loading spinner (lightweight)
 from rich.console import Console
 _console = Console()
 # Phase 3: Heavy dependencies with granular loading messages
     import json
     from typing import AsyncGenerator, Any, List, Optional, Union
     from pydantic import BaseModel, Field
     # --- Early Log Level Configuration ---
     logging.getLogger("LiteLLM").setLevel(logging.WARNING)
 with _console.status("[dim]Loading LiteLLM library...", spinner="dots"):
     import litellm
+# Phase 4: Application imports with granular loading messages
 print("  → Initializing proxy core...")
 with _console.status("[dim]Initializing proxy core...", spinner="dots"):
     from rotator_library import RotatingClient
 # Provider lazy loading happens during import, so time it here
 _provider_start = time.time()
 with _console.status("[dim]Discovering provider plugins...", spinner="dots"):
+    from rotator_library import (
+        PROVIDER_PLUGINS,
+    )  # This triggers lazy load via __getattr__
 _provider_time = time.time() - _provider_start
 # Get count after import (without timing to avoid double-counting)
 _plugin_count = len(PROVIDER_PLUGINS)
 # --- Pydantic Models ---
 class EmbeddingRequest(BaseModel):
     model: str
     dimensions: Optional[int] = None
     user: Optional[str] = None
 class ModelCard(BaseModel):
     """Basic model card for minimal response."""
     id: str
     object: str = "model"
     created: int = Field(default_factory=lambda: int(time.time()))
     owned_by: str = "Mirro-Proxy"
 class ModelCapabilities(BaseModel):
     """Model capability flags."""
     tool_choice: bool = False
     function_calling: bool = False
     reasoning: bool = False
     prompt_caching: bool = False
     assistant_prefill: bool = False
 class EnrichedModelCard(BaseModel):
     """Extended model card with pricing and capabilities."""
     id: str
     object: str = "model"
     created: int = Field(default_factory=lambda: int(time.time()))
     # Debug info (optional)
     _sources: Optional[List[str]] = None
     _match_type: Optional[str] = None
     class Config:
         extra = "allow"  # Allow extra fields from the service
 class ModelList(BaseModel):
     """List of models response."""
     object: str = "list"
     data: List[ModelCard]
 class EnrichedModelList(BaseModel):
     """List of enriched models with pricing and capabilities."""
     object: str = "list"
     data: List[EnrichedModelCard]
 # Calculate total loading time
 _elapsed = time.time() - _start_time
+print(
+    f"✓ Server ready in {_elapsed:.2f}s ({_plugin_count} providers discovered in {_provider_time:.2f}s)"
+)
 # Clear screen and reprint header for clean startup view
 # This pushes loading messages up (still in scroll history) but shows a clean final screen
 import os as _os_module
+_os_module.system("cls" if _os_module.name == "nt" else "clear")
 # Reprint header
 print("━" * 70)
 print(f"Proxy API Key: {key_display}")
 print(f"GitHub: https://github.com/Mirrowel/LLM-API-Key-Proxy")
 print("━" * 70)
+print(
+    f"✓ Server ready in {_elapsed:.2f}s ({_plugin_count} providers discovered in {_provider_time:.2f}s)"
+)
 # Note: Debug logging will be added after logging configuration below
 console_handler = colorlog.StreamHandler(sys.stdout)
 console_handler.setLevel(logging.INFO)
 formatter = colorlog.ColoredFormatter(
+    "%(log_color)s%(message)s",
     log_colors={
+        "DEBUG": "cyan",
+        "INFO": "green",
+        "WARNING": "yellow",
+        "ERROR": "red",
+        "CRITICAL": "red,bg_white",
+    },
 )
 console_handler.setFormatter(formatter)
 # Configure a file handler for INFO-level logs and higher
 info_file_handler = logging.FileHandler(LOG_DIR / "proxy.log", encoding="utf-8")
 info_file_handler.setLevel(logging.INFO)
+info_file_handler.setFormatter(
+    logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+)
 # Configure a dedicated file handler for all DEBUG-level logs
 debug_file_handler = logging.FileHandler(LOG_DIR / "proxy_debug.log", encoding="utf-8")
 debug_file_handler.setLevel(logging.DEBUG)
+debug_file_handler.setFormatter(
+    logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+)
 # Create a filter to ensure the debug handler ONLY gets DEBUG messages from the rotator_library
 class RotatorDebugFilter(logging.Filter):
     def filter(self, record):
+        return record.levelno == logging.DEBUG and record.name.startswith(
+            "rotator_library"
+        )
 debug_file_handler.addFilter(RotatorDebugFilter())
 # Configure a console handler with color
 console_handler = colorlog.StreamHandler(sys.stdout)
 console_handler.setLevel(logging.INFO)
 formatter = colorlog.ColoredFormatter(
+    "%(log_color)s%(message)s",
     log_colors={
+        "DEBUG": "cyan",
+        "INFO": "green",
+        "WARNING": "yellow",
+        "ERROR": "red",
+        "CRITICAL": "red,bg_white",
+    },
 )
 console_handler.setFormatter(formatter)
 # Add a filter to prevent any LiteLLM logs from cluttering the console
 class NoLiteLLMLogFilter(logging.Filter):
     def filter(self, record):
+        return not record.name.startswith("LiteLLM")
 console_handler.addFilter(NoLiteLLMLogFilter())
 # Get the root logger and set it to DEBUG to capture all messages
 for key, value in os.environ.items():
     if key.startswith("IGNORE_MODELS_"):
         provider = key.replace("IGNORE_MODELS_", "").lower()
+        models_to_ignore = [
+            model.strip() for model in value.split(",") if model.strip()
+        ]
         ignore_models[provider] = models_to_ignore
+        logging.debug(
+            f"Loaded ignore list for provider '{provider}': {models_to_ignore}"
+        )
 # Load model whitelist from environment variables
 whitelist_models = {}
 for key, value in os.environ.items():
     if key.startswith("WHITELIST_MODELS_"):
         provider = key.replace("WHITELIST_MODELS_", "").lower()
+        models_to_whitelist = [
+            model.strip() for model in value.split(",") if model.strip()
+        ]
         whitelist_models[provider] = models_to_whitelist
+        logging.debug(
+            f"Loaded whitelist for provider '{provider}': {models_to_whitelist}"
+        )
 # Load max concurrent requests per key from environment variables
 max_concurrent_requests_per_key = {}
         try:
             max_concurrent = int(value)
             if max_concurrent < 1:
+                logging.warning(
+                    f"Invalid max_concurrent value for provider '{provider}': {value}. Must be >= 1. Using default (1)."
+                )
                 max_concurrent = 1
             max_concurrent_requests_per_key[provider] = max_concurrent
+            logging.debug(
+                f"Loaded max concurrent requests for provider '{provider}': {max_concurrent}"
+            )
         except ValueError:
+            logging.warning(
+                f"Invalid max_concurrent value for provider '{provider}': {value}. Using default (1)."
+            )
 # --- Lifespan Management ---
 @asynccontextmanager
     if not skip_oauth_init and oauth_credentials:
         logging.info("Starting OAuth credential validation and deduplication...")
         processed_emails = {}  # email -> {provider: path}
+        credentials_to_initialize = {}  # provider -> [paths]
         final_oauth_credentials = {}
         # --- Pass 1: Pre-initialization Scan & Deduplication ---
+        # logging.info("Pass 1: Scanning for existing metadata to find duplicates...")
         for provider, paths in oauth_credentials.items():
             if provider not in credentials_to_initialize:
                 credentials_to_initialize[provider] = []
                 if path.startswith("env://"):
                     credentials_to_initialize[provider].append(path)
                     continue
                 try:
+                    with open(path, "r") as f:
                         data = json.load(f)
                     metadata = data.get("_proxy_metadata", {})
                     email = metadata.get("email")
                     if email:
                         if email not in processed_emails:
                             processed_emails[email] = {}
                         if provider in processed_emails[email]:
                             original_path = processed_emails[email][provider]
+                            logging.warning(
+                                f"Duplicate for '{email}' on '{provider}' found in pre-scan: '{Path(path).name}'. Original: '{Path(original_path).name}'. Skipping."
+                            )
                             continue
                         else:
                             processed_emails[email][provider] = path
                     credentials_to_initialize[provider].append(path)
                 except (FileNotFoundError, json.JSONDecodeError) as e:
+                    logging.warning(
+                        f"Could not pre-read metadata from '{path}': {e}. Will process during initialization."
+                    )
                     credentials_to_initialize[provider].append(path)
         # --- Pass 2: Parallel Initialization of Filtered Credentials ---
+        # logging.info("Pass 2: Initializing unique credentials and performing final check...")
         async def process_credential(provider: str, path: str, provider_instance):
             """Process a single credential: initialize and fetch user info."""
             try:
                 await provider_instance.initialize_token(path)
+                if not hasattr(provider_instance, "get_user_info"):
                     return (provider, path, None, None)
                 user_info = await provider_instance.get_user_info(path)
                 return (provider, path, email, None)
             except Exception as e:
+                logging.error(
+                    f"Failed to process OAuth token for {provider} at '{path}': {e}"
+                )
                 return (provider, path, None, e)
         # Collect all tasks for parallel execution
             provider_plugin_class = PROVIDER_PLUGINS.get(provider)
             if not provider_plugin_class:
                 continue
             provider_instance = provider_plugin_class()
             for path in paths:
                 tasks.append(process_credential(provider, path, provider_instance))
                 continue
             provider, path, email, error = result
             # Skip if there was an error
             if error:
                 continue
             # Handle empty email
             if not email:
+                logging.warning(
+                    f"Could not retrieve email for '{path}'. Treating as unique."
+                )
                 if provider not in final_oauth_credentials:
                     final_oauth_credentials[provider] = []
                 final_oauth_credentials[provider].append(path)
             # Deduplication check
             if email not in processed_emails:
                 processed_emails[email] = {}
+            if (
+                provider in processed_emails[email]
+                and processed_emails[email][provider] != path
+            ):
                 original_path = processed_emails[email][provider]
+                logging.warning(
+                    f"Duplicate for '{email}' on '{provider}' found post-init: '{Path(path).name}'. Original: '{Path(original_path).name}'. Skipping."
+                )
                 continue
             else:
                 processed_emails[email][provider] = path
                 # Update metadata (skip for env-based credentials - they don't have files)
                 if not path.startswith("env://"):
                     try:
+                        with open(path, "r+") as f:
                             data = json.load(f)
                             metadata = data.get("_proxy_metadata", {})
                             metadata["email"] = email
     # The client now uses the root logger configuration
     client = RotatingClient(
         api_keys=api_keys,
+        oauth_credentials=oauth_credentials,  # Pass OAuth config
         configure_logging=True,
         litellm_provider_params=litellm_provider_params,
         ignore_models=ignore_models,
         whitelist_models=whitelist_models,
         enable_request_logging=ENABLE_REQUEST_LOGGING,
+        max_concurrent_requests_per_key=max_concurrent_requests_per_key,
     )
     # Log loaded credentials summary (compact, always visible for deployment verification)
     #_api_summary = ', '.join([f"{p}:{len(c)}" for p, c in api_keys.items()]) if api_keys else "none"
     #_oauth_summary = ', '.join([f"{p}:{len(c)}" for p, c in oauth_credentials.items()]) if oauth_credentials else "none"
     #print(f"🔑 Credentials loaded: {_total_summary} (API: {_api_summary} | OAuth: {_oauth_summary})")
     client.background_refresher.start() # Start the background task
     app.state.rotating_client = client
     # Warn if no provider credentials are configured
     if not client.all_credentials:
         logging.warning("=" * 70)
         logging.warning("⚠️  NO PROVIDER CREDENTIALS CONFIGURED")
         logging.warning("The proxy is running but cannot serve any LLM requests.")
+        logging.warning(
+            "Launch the credential tool to add API keys or OAuth credentials."
+        )
         logging.warning("  • Executable: Run with --add-credential flag")
         logging.warning("  • Source: python src/proxy_app/main.py --add-credential")
         logging.warning("=" * 70)
     os.environ["LITELLM_LOG"] = "ERROR"
     litellm.set_verbose = False
     litellm.drop_params = True
     else:
         app.state.embedding_batcher = None
         logging.info("RotatingClient initialized (EmbeddingBatcher disabled).")
     # Start model info service in background (fetches pricing/capabilities data)
     # This runs asynchronously and doesn't block proxy startup
     model_info_service = await init_model_info_service()
     app.state.model_info_service = model_info_service
     logging.info("Model info service started (fetching pricing data in background).")
     yield
+    await client.background_refresher.stop()  # Stop the background task on shutdown
     if app.state.embedding_batcher:
         await app.state.embedding_batcher.stop()
     await client.close()
     # Stop model info service
+    if hasattr(app.state, "model_info_service") and app.state.model_info_service:
         await app.state.model_info_service.stop()
     if app.state.embedding_batcher:
         logging.info("RotatingClient and EmbeddingBatcher closed.")
     else:
         logging.info("RotatingClient closed.")
 # --- FastAPI App Setup ---
 app = FastAPI(lifespan=lifespan)
 )
 api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
 def get_rotating_client(request: Request) -> RotatingClient:
     """Dependency to get the rotating client instance from the app state."""
     return request.app.state.rotating_client
 def get_embedding_batcher(request: Request) -> EmbeddingBatcher:
     """Dependency to get the embedding batcher instance from the app state."""
     return request.app.state.embedding_batcher
 async def verify_api_key(auth: str = Depends(api_key_header)):
     """Dependency to verify the proxy API key."""
+    # If PROXY_API_KEY is not set or empty, skip verification (open access)
+    if not PROXY_API_KEY:
+        return auth
     if not auth or auth != f"Bearer {PROXY_API_KEY}":
         raise HTTPException(status_code=401, detail="Invalid or missing API Key")
     return auth
 async def streaming_response_wrapper(
     request: Request,
     request_data: dict,
     response_stream: AsyncGenerator[str, None],
+    logger: Optional[DetailedLogger] = None,
 ) -> AsyncGenerator[str, None]:
     """
     Wraps a streaming response to log the full response after completion
     """
     response_chunks = []
     full_response = {}
     try:
         async for chunk_str in response_stream:
             if await request.is_disconnected():
                 break
             yield chunk_str
             if chunk_str.strip() and chunk_str.startswith("data:"):
+                content = chunk_str[len("data:") :].strip()
                 if content != "[DONE]":
                     try:
                         chunk_data = json.loads(content)
             "error": {
                 "message": f"An unexpected error occurred during the stream: {str(e)}",
                 "type": "proxy_internal_error",
+                "code": 500,
             }
         }
         yield f"data: {json.dumps(error_payload)}\n\n"
         yield "data: [DONE]\n\n"
         # Also log this as a failed request
         if logger:
+            logger.log_final_response(
+                status_code=500, headers=None, body={"error": str(e)}
+            )
+        return  # Stop further processing
     finally:
         if response_chunks:
             # --- Aggregation Logic ---
                                 final_message["content"] = ""
                             if value:
                                 final_message["content"] += value
                         elif key == "tool_calls":
                             for tc_chunk in value:
                                 index = tc_chunk["index"]
                                 if index not in aggregated_tool_calls:
+                                    aggregated_tool_calls[index] = {
+                                        "type": "function",
+                                        "function": {"name": "", "arguments": ""},
+                                    }
                                 # Ensure 'function' key exists for this index before accessing its sub-keys
                                 if "function" not in aggregated_tool_calls[index]:
+                                    aggregated_tool_calls[index]["function"] = {
+                                        "name": "",
+                                        "arguments": "",
+                                    }
                                 if tc_chunk.get("id"):
                                     aggregated_tool_calls[index]["id"] = tc_chunk["id"]
                                 if "function" in tc_chunk:
                                     if "name" in tc_chunk["function"]:
                                         if tc_chunk["function"]["name"] is not None:
+                                            aggregated_tool_calls[index]["function"][
+                                                "name"
+                                            ] += tc_chunk["function"]["name"]
                                     if "arguments" in tc_chunk["function"]:
+                                        if (
+                                            tc_chunk["function"]["arguments"]
+                                            is not None
+                                        ):
+                                            aggregated_tool_calls[index]["function"][
+                                                "arguments"
+                                            ] += tc_chunk["function"]["arguments"]
                         elif key == "function_call":
                             if "function_call" not in final_message:
+                                final_message["function_call"] = {
+                                    "name": "",
+                                    "arguments": "",
+                                }
                             if "name" in value:
                                 if value["name"] is not None:
+                                    final_message["function_call"]["name"] += value[
+                                        "name"
+                                    ]
                             if "arguments" in value:
                                 if value["arguments"] is not None:
+                                    final_message["function_call"]["arguments"] += (
+                                        value["arguments"]
+                                    )
+                        else:  # Generic key handling for other data like 'reasoning'
                             # FIX: Role should always replace, never concatenate
                             if key == "role":
                                 final_message[key] = value
             final_choice = {
                 "index": 0,
                 "message": final_message,
+                "finish_reason": finish_reason,
             }
             full_response = {
                 "created": first_chunk.get("created"),
                 "model": first_chunk.get("model"),
                 "choices": [final_choice],
+                "usage": usage_data,
             }
         if logger:
             logger.log_final_response(
                 status_code=200,
                 headers=None,  # Headers are not available at this stage
+                body=full_response,
             )
 @app.post("/v1/chat/completions")
 async def chat_completions(
     request: Request,
     client: RotatingClient = Depends(get_rotating_client),
+    _=Depends(verify_api_key),
 ):
     """
     OpenAI-compatible endpoint powered by the RotatingClient.
         # instead of actual schemas, which can cause tool hallucination
         # Modes: "remove" = delete temperature key, "set" = change to 1.0, "false" = disabled
         override_temp_zero = os.getenv("OVERRIDE_TEMPERATURE_ZERO", "false").lower()
+        if (
+            override_temp_zero in ("remove", "set", "true", "1", "yes")
+            and "temperature" in request_data
+            and request_data["temperature"] == 0
+        ):
             if override_temp_zero == "remove":
                 # Remove temperature key entirely
                 del request_data["temperature"]
+                logging.debug(
+                    "OVERRIDE_TEMPERATURE_ZERO=remove: Removed temperature=0 from request"
+                )
             else:
                 # Set to 1.0 (for "set", "true", "1", "yes")
                 request_data["temperature"] = 1.0
+                logging.debug(
+                    "OVERRIDE_TEMPERATURE_ZERO=set: Converting temperature=0 to temperature=1.0"
+                )
         # If logging is enabled, perform all logging operations using the parsed data.
         if logger:
         # Extract and log specific reasoning parameters for monitoring.
         model = request_data.get("model")
+        generation_cfg = (
+            request_data.get("generationConfig", {})
+            or request_data.get("generation_config", {})
+            or {}
+        )
+        reasoning_effort = request_data.get("reasoning_effort") or generation_cfg.get(
+            "reasoning_effort"
+        )
+        custom_reasoning_budget = request_data.get(
+            "custom_reasoning_budget"
+        ) or generation_cfg.get("custom_reasoning_budget", False)
         logging.getLogger("rotator_library").debug(
             f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
             url=str(request.url),
             headers=dict(request.headers),
             client_info=(request.client.host, request.client.port),
+            request_data=request_data,
         )
         is_streaming = request_data.get("stream", False)
         if is_streaming:
             response_generator = client.acompletion(request=request, **request_data)
             return StreamingResponse(
+                streaming_response_wrapper(
+                    request, request_data, response_generator, logger
+                ),
+                media_type="text/event-stream",
             )
         else:
             response = await client.acompletion(request=request, **request_data)
             if logger:
                 # Assuming response has status_code and headers attributes
                 # This might need adjustment based on the actual response object
+                response_headers = (
+                    response.headers if hasattr(response, "headers") else None
+                )
+                status_code = (
+                    response.status_code if hasattr(response, "status_code") else 200
+                )
                 logger.log_final_response(
                     status_code=status_code,
                     headers=response_headers,
+                    body=response.model_dump(),
                 )
             return response
+    except (
+        litellm.InvalidRequestError,
+        ValueError,
+        litellm.ContextWindowExceededError,
+    ) as e:
         raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
     except litellm.AuthenticationError as e:
         raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
             except json.JSONDecodeError:
                 request_data = {"error": "Could not parse request body"}
             if logger:
+                logger.log_final_response(
+                    status_code=500, headers=None, body={"error": str(e)}
+                )
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/v1/embeddings")
 async def embeddings(
     request: Request,
     body: EmbeddingRequest,
     client: RotatingClient = Depends(get_rotating_client),
     batcher: Optional[EmbeddingBatcher] = Depends(get_embedding_batcher),
+    _=Depends(verify_api_key),
 ):
     """
     OpenAI-compatible endpoint for creating embeddings.
             url=str(request.url),
             headers=dict(request.headers),
             client_info=(request.client.host, request.client.port),
+            request_data=request_data,
         )
         if USE_EMBEDDING_BATCHER and batcher:
             # --- Server-Side Batching Logic ---
                 individual_request = request_data.copy()
                 individual_request["input"] = single_input
                 tasks.append(batcher.add_request(individual_request))
             results = await asyncio.gather(*tasks)
             all_data = []
                 "object": "list",
                 "model": results[0]["model"],
                 "data": all_data,
+                "usage": {
+                    "prompt_tokens": total_prompt_tokens,
+                    "total_tokens": total_tokens,
+                },
             }
             response = litellm.EmbeddingResponse(**final_response_data)
         else:
             # --- Direct Pass-Through Logic ---
             request_data = body.model_dump(exclude_none=True)
             if isinstance(request_data.get("input"), str):
                 request_data["input"] = [request_data["input"]]
             response = await client.aembedding(request=request, **request_data)
         return response
     except HTTPException as e:
         # Re-raise HTTPException to ensure it's not caught by the generic Exception handler
         raise e
+    except (
+        litellm.InvalidRequestError,
+        ValueError,
+        litellm.ContextWindowExceededError,
+    ) as e:
         raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
     except litellm.AuthenticationError as e:
         raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
         logging.error(f"Embedding request failed: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
 def read_root():
     return {"Status": "API Key Proxy is running"}
 @app.get("/v1/models")
 async def list_models(
     request: Request,
 ):
     """
     Returns a list of available models in the OpenAI-compatible format.
     Query Parameters:
         enriched: If True (default), returns detailed model info with pricing and capabilities.
                   If False, returns minimal OpenAI-compatible response.
     """
     model_ids = await client.get_all_available_models(grouped=False)
+    if enriched and hasattr(request.app.state, "model_info_service"):
         model_info_service = request.app.state.model_info_service
         if model_info_service.is_ready:
             # Return enriched model data
             enriched_data = model_info_service.enrich_model_list(model_ids)
             return {"object": "list", "data": enriched_data}
     # Fallback to basic model cards
+    model_cards = [
+        {
+            "id": model_id,
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": "Mirro-Proxy",
+        }
+        for model_id in model_ids
+    ]
     return {"object": "list", "data": model_cards}
 ):
     """
     Returns detailed information about a specific model.
     Path Parameters:
         model_id: The model ID (e.g., "anthropic/claude-3-opus", "openrouter/openai/gpt-4")
     """
+    if hasattr(request.app.state, "model_info_service"):
         model_info_service = request.app.state.model_info_service
         if model_info_service.is_ready:
             info = model_info_service.get_model_info(model_id)
             if info:
                 return info.to_dict()
     # Return basic info if service not ready or model not found
     return {
         "id": model_id,
     """
     Returns statistics about the model info service (for monitoring/debugging).
     """
+    if hasattr(request.app.state, "model_info_service"):
         return request.app.state.model_info_service.get_stats()
     return {"error": "Model info service not initialized"}
     """
     return list(PROVIDER_PLUGINS.keys())
 @app.post("/v1/token-count")
 async def token_count(
+    request: Request,
     client: RotatingClient = Depends(get_rotating_client),
+    _=Depends(verify_api_key),
 ):
     """
     Calculates the token count for a given list of messages and a model.
         messages = data.get("messages")
         if not model or not messages:
+            raise HTTPException(
+                status_code=400, detail="'model' and 'messages' are required."
+            )
         count = client.token_count(**data)
         return {"token_count": count}
 @app.post("/v1/cost-estimate")
+async def cost_estimate(request: Request, _=Depends(verify_api_key)):
     """
     Estimates the cost for a request based on token counts and model pricing.
     Request body:
         {
             "model": "anthropic/claude-3-opus",
             "cache_read_tokens": 0,       # optional
             "cache_creation_tokens": 0    # optional
         }
     Returns:
         {
             "model": "anthropic/claude-3-opus",
         completion_tokens = data.get("completion_tokens", 0)
         cache_read_tokens = data.get("cache_read_tokens", 0)
         cache_creation_tokens = data.get("cache_creation_tokens", 0)
         if not model:
             raise HTTPException(status_code=400, detail="'model' is required.")
         result = {
             "model": model,
             "cost": None,
             "currency": "USD",
             "pricing": {},
+            "source": None,
         }
         # Try model info service first
+        if hasattr(request.app.state, "model_info_service"):
             model_info_service = request.app.state.model_info_service
             if model_info_service.is_ready:
                 cost = model_info_service.calculate_cost(
+                    model,
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read_tokens,
+                    cache_creation_tokens,
                 )
                 if cost is not None:
                     cost_info = model_info_service.get_cost_info(model)
                     result["pricing"] = cost_info or {}
                     result["source"] = "model_info_service"
                     return result
         # Fallback to litellm
         try:
             import litellm
             # Create a mock response for cost calculation
             model_info = litellm.get_model_info(model)
             input_cost = model_info.get("input_cost_per_token", 0)
             output_cost = model_info.get("output_cost_per_token", 0)
             if input_cost or output_cost:
                 cost = (prompt_tokens * input_cost) + (completion_tokens * output_cost)
                 result["cost"] = cost
                 result["pricing"] = {
                     "input_cost_per_token": input_cost,
+                    "output_cost_per_token": output_cost,
                 }
                 result["source"] = "litellm_fallback"
                 return result
         except Exception:
             pass
         result["source"] = "unknown"
         result["error"] = "Pricing data not available for this model"
         return result
     except HTTPException:
         raise
     except Exception as e:
 if __name__ == "__main__":
     # Define ENV_FILE for onboarding checks
     ENV_FILE = Path.cwd() / ".env"
     # Check if launcher TUI should be shown (no arguments provided)
     if len(sys.argv) == 1:
         # No arguments - show launcher TUI (lazy import)
         from proxy_app.launcher_tui import run_launcher_tui
         run_launcher_tui()
         # Launcher modifies sys.argv and returns, or exits if user chose Exit
         # If we get here, user chose "Run Proxy" and sys.argv is modified
         # Re-parse arguments with modified sys.argv
         args = parser.parse_args()
     def needs_onboarding() -> bool:
         """
         Check if the proxy needs onboarding (first-time setup).
         # PROXY_API_KEY is optional (will show warning if not set)
         if not ENV_FILE.is_file():
             return True
         return False
     def show_onboarding_message():
         """Display clear explanatory message for why onboarding is needed."""
+        os.system(
+            "cls" if os.name == "nt" else "clear"
+        )  # Clear terminal for clean presentation
+        console.print(
+            Panel.fit(
+                "[bold cyan]🚀 LLM API Key Proxy - First Time Setup[/bold cyan]",
+                border_style="cyan",
+            )
+        )
         console.print("[bold yellow]⚠️  Configuration Required[/bold yellow]\n")
         console.print("The proxy needs initial configuration:")
         console.print("  [red]❌ No .env file found[/red]")
         console.print("\n[bold]Why this matters:[/bold]")
         console.print("  • The .env file stores your credentials and settings")
         console.print("  • PROXY_API_KEY protects your proxy from unauthorized access")
         console.print("  • Provider API keys enable LLM access")
         console.print("\n[bold]What happens next:[/bold]")
         console.print("  1. We'll create a .env file with PROXY_API_KEY")
         console.print("  2. You can add LLM provider credentials (API keys or OAuth)")
         console.print("  3. The proxy will then start normally")
+        console.print(
+            "\n[bold yellow]⚠️  Note:[/bold yellow] The credential tool adds PROXY_API_KEY by default."
+        )
         console.print("   You can remove it later if you want an unsecured proxy.\n")
+        console.input(
+            "[bold green]Press Enter to launch the credential setup tool...[/bold green]"
+        )
     # Check if user explicitly wants to add credentials
     if args.add_credential:
         # Import and call ensure_env_defaults to create .env and PROXY_API_KEY if needed
         from rotator_library.credential_tool import ensure_env_defaults
         ensure_env_defaults()
         # Reload environment variables after ensure_env_defaults creates/updates .env
         load_dotenv(override=True)
             # Import console from rich for better messaging
             from rich.console import Console
             from rich.panel import Panel
             console = Console()
             # Show clear explanatory message
             show_onboarding_message()
             # Launch credential tool automatically
             from rotator_library.credential_tool import ensure_env_defaults
             ensure_env_defaults()
             load_dotenv(override=True)
             run_credential_tool()
             # After credential tool exits, reload and re-check
             load_dotenv(override=True)
             # Re-read PROXY_API_KEY from environment
             PROXY_API_KEY = os.getenv("PROXY_API_KEY")
             # Verify onboarding is complete
             if needs_onboarding():
                 console.print("\n[bold red]❌ Configuration incomplete.[/bold red]")
+                console.print(
+                    "The proxy still cannot start. Please ensure PROXY_API_KEY is set in .env\n"
+                )
                 sys.exit(1)
             else:
                 console.print("\n[bold green]✅ Configuration complete![/bold green]")
                 console.print("\nStarting proxy server...\n")
+        import uvicorn
+        uvicorn.run(app, host=args.host, port=args.port)

src/proxy_app/settings_tool.py CHANGED Viewed

@@ -17,37 +17,38 @@ console = Console()
 def clear_screen():
     """
-    Cross-platform terminal clear that works robustly on both
     classic Windows conhost and modern terminals (Windows Terminal, Linux, Mac).
     Uses native OS commands instead of ANSI escape sequences:
     - Windows (conhost & Windows Terminal): cls
     - Unix-like systems (Linux, Mac): clear
     """
-    os.system('cls' if os.name == 'nt' else 'clear')
 class AdvancedSettings:
     """Manages pending changes to .env"""
     def __init__(self):
         self.env_file = Path.cwd() / ".env"
         self.pending_changes = {}  # key -> value (None means delete)
         self.load_current_settings()
     def load_current_settings(self):
         """Load current .env values into env vars"""
         from dotenv import load_dotenv
         load_dotenv(override=True)
     def set(self, key: str, value: str):
         """Stage a change"""
         self.pending_changes[key] = value
     def remove(self, key: str):
         """Stage a removal"""
         self.pending_changes[key] = None
     def save(self):
         """Write pending changes to .env"""
         for key, value in self.pending_changes.items():
@@ -57,14 +58,14 @@ class AdvancedSettings:
             else:
                 # Set key
                 set_key(str(self.env_file), key, value)
         self.pending_changes.clear()
         self.load_current_settings()
     def discard(self):
         """Discard pending changes"""
         self.pending_changes.clear()
     def has_pending(self) -> bool:
         """Check if there are pending changes"""
         return bool(self.pending_changes)
@@ -72,14 +73,14 @@ class AdvancedSettings:
 class CustomProviderManager:
     """Manages custom provider API bases"""
     def __init__(self, settings: AdvancedSettings):
         self.settings = settings
     def get_current_providers(self) -> Dict[str, str]:
         """Get currently configured custom providers"""
         from proxy_app.provider_urls import PROVIDER_URL_MAP
         providers = {}
         for key, value in os.environ.items():
             if key.endswith("_API_BASE"):
@@ -88,16 +89,16 @@ class CustomProviderManager:
                 if provider not in PROVIDER_URL_MAP:
                     providers[provider] = value
         return providers
     def add_provider(self, name: str, api_base: str):
         """Add PROVIDER_API_BASE"""
         key = f"{name.upper()}_API_BASE"
         self.settings.set(key, api_base)
     def edit_provider(self, name: str, api_base: str):
         """Edit PROVIDER_API_BASE"""
         self.add_provider(name, api_base)
     def remove_provider(self, name: str):
         """Remove PROVIDER_API_BASE"""
         key = f"{name.upper()}_API_BASE"
@@ -106,10 +107,10 @@ class CustomProviderManager:
 class ModelDefinitionManager:
     """Manages PROVIDER_MODELS"""
     def __init__(self, settings: AdvancedSettings):
         self.settings = settings
     def get_current_provider_models(self, provider: str) -> Optional[Dict]:
         """Get currently configured models for a provider"""
         key = f"{provider.upper()}_MODELS"
@@ -120,7 +121,7 @@ class ModelDefinitionManager:
             except (json.JSONDecodeError, ValueError):
                 return None
         return None
     def get_all_providers_with_models(self) -> Dict[str, int]:
         """Get all providers with model definitions"""
         providers = {}
@@ -136,13 +137,13 @@ class ModelDefinitionManager:
                 except (json.JSONDecodeError, ValueError):
                     pass
         return providers
     def set_models(self, provider: str, models: Dict[str, Dict[str, Any]]):
         """Set PROVIDER_MODELS"""
         key = f"{provider.upper()}_MODELS"
         value = json.dumps(models)
         self.settings.set(key, value)
     def remove_models(self, provider: str):
         """Remove PROVIDER_MODELS"""
         key = f"{provider.upper()}_MODELS"
@@ -151,10 +152,10 @@ class ModelDefinitionManager:
 class ConcurrencyManager:
     """Manages MAX_CONCURRENT_REQUESTS_PER_KEY_PROVIDER"""
     def __init__(self, settings: AdvancedSettings):
         self.settings = settings
     def get_current_limits(self) -> Dict[str, int]:
         """Get currently configured concurrency limits"""
         limits = {}
@@ -166,18 +167,161 @@ class ConcurrencyManager:
                 except (json.JSONDecodeError, ValueError):
                     pass
         return limits
     def set_limit(self, provider: str, limit: int):
         """Set concurrency limit"""
         key = f"MAX_CONCURRENT_REQUESTS_PER_KEY_{provider.upper()}"
         self.settings.set(key, str(limit))
     def remove_limit(self, provider: str):
         """Remove concurrency limit (reset to default)"""
         key = f"MAX_CONCURRENT_REQUESTS_PER_KEY_{provider.upper()}"
         self.settings.remove(key)
 # =============================================================================
 # PROVIDER-SPECIFIC SETTINGS DEFINITIONS
 # =============================================================================
@@ -294,24 +438,26 @@ PROVIDER_SETTINGS_MAP = {
 class ProviderSettingsManager:
     """Manages provider-specific configuration settings"""
     def __init__(self, settings: AdvancedSettings):
         self.settings = settings
     def get_available_providers(self) -> List[str]:
         """Get list of providers with specific settings available"""
         return list(PROVIDER_SETTINGS_MAP.keys())
-    def get_provider_settings_definitions(self, provider: str) -> Dict[str, Dict[str, Any]]:
         """Get settings definitions for a provider"""
         return PROVIDER_SETTINGS_MAP.get(provider, {})
     def get_current_value(self, key: str, definition: Dict[str, Any]) -> Any:
         """Get current value of a setting from environment"""
         env_value = os.getenv(key)
         if env_value is None:
             return definition.get("default")
         setting_type = definition.get("type", "str")
         try:
             if setting_type == "bool":
@@ -322,7 +468,7 @@ class ProviderSettingsManager:
                 return env_value
         except (ValueError, AttributeError):
             return definition.get("default")
     def get_all_current_values(self, provider: str) -> Dict[str, Any]:
         """Get all current values for a provider"""
         definitions = self.get_provider_settings_definitions(provider)
@@ -330,7 +476,7 @@ class ProviderSettingsManager:
         for key, definition in definitions.items():
             values[key] = self.get_current_value(key, definition)
         return values
     def set_value(self, key: str, value: Any, definition: Dict[str, Any]):
         """Set a setting value, converting to string for .env storage"""
         setting_type = definition.get("type", "str")
@@ -339,11 +485,11 @@ class ProviderSettingsManager:
         else:
             str_value = str(value)
         self.settings.set(key, str_value)
     def reset_to_default(self, key: str):
         """Remove a setting to reset it to default"""
         self.settings.remove(key)
     def get_modified_settings(self, provider: str) -> Dict[str, Any]:
         """Get settings that differ from defaults"""
         definitions = self.get_provider_settings_definitions(provider)
@@ -358,80 +504,100 @@ class ProviderSettingsManager:
 class SettingsTool:
     """Main settings tool TUI"""
     def __init__(self):
         self.console = Console()
         self.settings = AdvancedSettings()
         self.provider_mgr = CustomProviderManager(self.settings)
         self.model_mgr = ModelDefinitionManager(self.settings)
         self.concurrency_mgr = ConcurrencyManager(self.settings)
         self.provider_settings_mgr = ProviderSettingsManager(self.settings)
         self.running = True
     def get_available_providers(self) -> List[str]:
         """Get list of providers that have credentials configured"""
         env_file = Path.cwd() / ".env"
         providers = set()
         # Scan for providers with API keys from local .env
         if env_file.exists():
             try:
-                with open(env_file, 'r', encoding='utf-8') as f:
                     for line in f:
                         line = line.strip()
-                        if "_API_KEY" in line and "PROXY_API_KEY" not in line and "=" in line:
                             provider = line.split("_API_KEY")[0].strip().lower()
                             providers.add(provider)
             except (IOError, OSError):
                 pass
         # Also check for OAuth providers from files
-        oauth_dir = Path("oauth_credentials")
         if oauth_dir.exists():
             for file in oauth_dir.glob("*_oauth_*.json"):
                 provider = file.name.split("_oauth_")[0]
                 providers.add(provider)
         return sorted(list(providers))
     def run(self):
         """Main loop"""
         while self.running:
             self.show_main_menu()
     def show_main_menu(self):
         """Display settings categories"""
         clear_screen()
-        self.console.print(Panel.fit(
-            "[bold cyan]🔧 Advanced Settings Configuration[/bold cyan]",
-            border_style="cyan"
-        ))
         self.console.print()
         self.console.print("[bold]⚙️  Configuration Categories[/bold]")
         self.console.print()
         self.console.print("   1. 🌐 Custom Provider API Bases")
         self.console.print("   2. 📦 Provider Model Definitions")
         self.console.print("   3. ⚡ Concurrency Limits")
-        self.console.print("   4. 🔬 Provider-Specific Settings")
-        self.console.print("   5. 💾 Save & Exit")
-        self.console.print("   6. 🚫 Exit Without Saving")
         self.console.print()
         self.console.print("━" * 70)
         if self.settings.has_pending():
-            self.console.print("[yellow]ℹ️  Changes are pending until you select \"Save & Exit\"[/yellow]")
         else:
             self.console.print("[dim]ℹ️  No pending changes[/dim]")
         self.console.print()
-        self.console.print("[dim]⚠️  Model filters not supported - edit .env for IGNORE_MODELS_* / WHITELIST_MODELS_*[/dim]")
         self.console.print()
-        choice = Prompt.ask("Select option", choices=["1", "2", "3", "4", "5", "6"], show_choices=False)
         if choice == "1":
             self.manage_custom_providers()
         elif choice == "2":
@@ -439,34 +605,38 @@ class SettingsTool:
         elif choice == "3":
             self.manage_concurrency_limits()
         elif choice == "4":
-            self.manage_provider_settings()
         elif choice == "5":
-            self.save_and_exit()
         elif choice == "6":
             self.exit_without_saving()
     def manage_custom_providers(self):
         """Manage custom provider API bases"""
         while True:
             clear_screen()
             providers = self.provider_mgr.get_current_providers()
-            self.console.print(Panel.fit(
-                "[bold cyan]🌐 Custom Provider API Bases[/bold cyan]",
-                border_style="cyan"
-            ))
             self.console.print()
             self.console.print("[bold]📋 Configured Custom Providers[/bold]")
             self.console.print("━" * 70)
             if providers:
                 for name, base in providers.items():
                     self.console.print(f"   • {name:15} {base}")
             else:
                 self.console.print("   [dim]No custom providers configured[/dim]")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
@@ -476,94 +646,116 @@ class SettingsTool:
             self.console.print("   2. ✏️  Edit Existing Provider")
             self.console.print("   3. 🗑️  Remove Provider")
             self.console.print("   4. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
-            choice = Prompt.ask("Select option", choices=["1", "2", "3", "4"], show_choices=False)
             if choice == "1":
                 name = Prompt.ask("Provider name (e.g., 'opencode')").strip().lower()
                 if name:
                     api_base = Prompt.ask("API Base URL").strip()
                     if api_base:
                         self.provider_mgr.add_provider(name, api_base)
-                        self.console.print(f"\n[green]✅ Custom provider '{name}' configured![/green]")
-                        self.console.print(f"   To use: set {name.upper()}_API_KEY in credentials")
                         input("\nPress Enter to continue...")
             elif choice == "2":
                 if not providers:
                     self.console.print("\n[yellow]No providers to edit[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
                 self.console.print("\n[bold]Select provider to edit:[/bold]")
                 providers_list = list(providers.keys())
                 for idx, prov in enumerate(providers_list, 1):
                     self.console.print(f"   {idx}. {prov}")
-                choice_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(providers_list) + 1)])
                 name = providers_list[choice_idx - 1]
                 current_base = providers.get(name, "")
                 self.console.print(f"\nCurrent API Base: {current_base}")
-                new_base = Prompt.ask("New API Base [press Enter to keep current]", default=current_base).strip()
                 if new_base and new_base != current_base:
                     self.provider_mgr.edit_provider(name, new_base)
-                    self.console.print(f"\n[green]✅ Custom provider '{name}' updated![/green]")
                 else:
                     self.console.print("\n[yellow]No changes made[/yellow]")
                 input("\nPress Enter to continue...")
             elif choice == "3":
                 if not providers:
                     self.console.print("\n[yellow]No providers to remove[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
                 self.console.print("\n[bold]Select provider to remove:[/bold]")
                 providers_list = list(providers.keys())
                 for idx, prov in enumerate(providers_list, 1):
                     self.console.print(f"   {idx}. {prov}")
-                choice_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(providers_list) + 1)])
                 name = providers_list[choice_idx - 1]
                 if Confirm.ask(f"Remove '{name}'?"):
                     self.provider_mgr.remove_provider(name)
-                    self.console.print(f"\n[green]✅ Provider '{name}' removed![/green]")
                     input("\nPress Enter to continue...")
             elif choice == "4":
                 break
     def manage_model_definitions(self):
         """Manage provider model definitions"""
         while True:
             clear_screen()
             all_providers = self.model_mgr.get_all_providers_with_models()
-            self.console.print(Panel.fit(
-                "[bold cyan]📦 Provider Model Definitions[/bold cyan]",
-                border_style="cyan"
-            ))
             self.console.print()
             self.console.print("[bold]📋 Configured Provider Models[/bold]")
             self.console.print("━" * 70)
             if all_providers:
                 for provider, count in all_providers.items():
-                    self.console.print(f"   • {provider:15} {count} model{'s' if count > 1 else ''}")
             else:
                 self.console.print("   [dim]No model definitions configured[/dim]")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
@@ -574,13 +766,15 @@ class SettingsTool:
             self.console.print("   3. 👁️  View Provider Models")
             self.console.print("   4. 🗑️  Remove Provider Models")
             self.console.print("   5. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
-            choice = Prompt.ask("Select option", choices=["1", "2", "3", "4", "5"], show_choices=False)
             if choice == "1":
                 self.add_model_definitions()
             elif choice == "2":
@@ -600,57 +794,71 @@ class SettingsTool:
                     self.console.print("\n[yellow]No providers to remove[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
-                self.console.print("\n[bold]Select provider to remove models from:[/bold]")
                 providers_list = list(all_providers.keys())
                 for idx, prov in enumerate(providers_list, 1):
                     self.console.print(f"   {idx}. {prov}")
-                choice_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(providers_list) + 1)])
                 provider = providers_list[choice_idx - 1]
                 if Confirm.ask(f"Remove all model definitions for '{provider}'?"):
                     self.model_mgr.remove_models(provider)
-                    self.console.print(f"\n[green]✅ Model definitions removed for '{provider}'![/green]")
                     input("\nPress Enter to continue...")
             elif choice == "5":
                 break
     def add_model_definitions(self):
         """Add model definitions for a provider"""
         # Get available providers from credentials
         available_providers = self.get_available_providers()
         if not available_providers:
-            self.console.print("\n[yellow]No providers with credentials found. Please add credentials first.[/yellow]")
             input("\nPress Enter to continue...")
             return
         # Show provider selection menu
         self.console.print("\n[bold]Select provider:[/bold]")
         for idx, prov in enumerate(available_providers, 1):
             self.console.print(f"   {idx}. {prov}")
-        self.console.print(f"   {len(available_providers) + 1}. Enter custom provider name")
-        choice = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(available_providers) + 2)])
         if choice == len(available_providers) + 1:
             provider = Prompt.ask("Provider name").strip().lower()
         else:
             provider = available_providers[choice - 1]
         if not provider:
             return
         self.console.print("\nHow would you like to define models?")
         self.console.print("   1. Simple list (names only)")
         self.console.print("   2. Advanced (names with IDs and options)")
         mode = Prompt.ask("Select mode", choices=["1", "2"], show_choices=False)
         models = {}
         if mode == "1":
             # Simple mode
             while True:
@@ -667,13 +875,19 @@ class SettingsTool:
                     break
                 if name:
                     model_def = {}
-                    model_id = Prompt.ask(f"Model ID [press Enter to use '{name}']", default=name).strip()
                     if model_id and model_id != name:
                         model_def["id"] = model_id
                     # Optional: model options
-                    if Confirm.ask("Add model options (e.g., temperature limits)?", default=False):
-                        self.console.print("\nEnter options as key=value pairs (one per line, 'done' to finish):")
                         options = {}
                         while True:
                             opt = Prompt.ask("Option").strip()
@@ -690,121 +904,143 @@ class SettingsTool:
                                 options[key.strip()] = value
                         if options:
                             model_def["options"] = options
                     models[name] = model_def
         if models:
             self.model_mgr.set_models(provider, models)
-            self.console.print(f"\n[green]✅ Model definitions saved for '{provider}'![/green]")
         else:
             self.console.print("\n[yellow]No models added[/yellow]")
         input("\nPress Enter to continue...")
     def edit_model_definitions(self, providers: List[str]):
         """Edit existing model definitions"""
         # Show numbered list
         self.console.print("\n[bold]Select provider to edit:[/bold]")
         for idx, prov in enumerate(providers, 1):
             self.console.print(f"   {idx}. {prov}")
-        choice_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(providers) + 1)])
         provider = providers[choice_idx - 1]
         current_models = self.model_mgr.get_current_provider_models(provider)
         if not current_models:
             self.console.print(f"\n[yellow]No models found for '{provider}'[/yellow]")
             input("\nPress Enter to continue...")
             return
         # Convert to dict if list
         if isinstance(current_models, list):
             current_models = {m: {} for m in current_models}
         while True:
             clear_screen()
             self.console.print(f"[bold]Editing models for: {provider}[/bold]\n")
             self.console.print("Current models:")
             for i, (name, definition) in enumerate(current_models.items(), 1):
-                model_id = definition.get("id", name) if isinstance(definition, dict) else name
                 self.console.print(f"   {i}. {name} (ID: {model_id})")
             self.console.print("\nOptions:")
             self.console.print("   1. Add new model")
             self.console.print("   2. Edit existing model")
             self.console.print("   3. Remove model")
             self.console.print("   4. Done")
-            choice = Prompt.ask("\nSelect option", choices=["1", "2", "3", "4"], show_choices=False)
             if choice == "1":
                 name = Prompt.ask("New model name").strip()
                 if name and name not in current_models:
                     model_id = Prompt.ask("Model ID", default=name).strip()
                     current_models[name] = {"id": model_id} if model_id != name else {}
             elif choice == "2":
                 # Show numbered list
                 models_list = list(current_models.keys())
                 self.console.print("\n[bold]Select model to edit:[/bold]")
                 for idx, model_name in enumerate(models_list, 1):
                     self.console.print(f"   {idx}. {model_name}")
-                model_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(models_list) + 1)])
                 name = models_list[model_idx - 1]
                 current_def = current_models[name]
-                current_id = current_def.get("id", name) if isinstance(current_def, dict) else name
                 new_id = Prompt.ask("Model ID", default=current_id).strip()
                 current_models[name] = {"id": new_id} if new_id != name else {}
             elif choice == "3":
                 # Show numbered list
                 models_list = list(current_models.keys())
                 self.console.print("\n[bold]Select model to remove:[/bold]")
                 for idx, model_name in enumerate(models_list, 1):
                     self.console.print(f"   {idx}. {model_name}")
-                model_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(models_list) + 1)])
                 name = models_list[model_idx - 1]
                 if Confirm.ask(f"Remove '{name}'?"):
                     del current_models[name]
             elif choice == "4":
                 break
         if current_models:
             self.model_mgr.set_models(provider, current_models)
             self.console.print(f"\n[green]✅ Models updated for '{provider}'![/green]")
         else:
-            self.console.print("\n[yellow]No models left - removing definition[/yellow]")
             self.model_mgr.remove_models(provider)
         input("\nPress Enter to continue...")
     def view_model_definitions(self, providers: List[str]):
         """View model definitions for a provider"""
         # Show numbered list
         self.console.print("\n[bold]Select provider to view:[/bold]")
         for idx, prov in enumerate(providers, 1):
             self.console.print(f"   {idx}. {prov}")
-        choice_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(providers) + 1)])
         provider = providers[choice_idx - 1]
         models = self.model_mgr.get_current_provider_models(provider)
         if not models:
             self.console.print(f"\n[yellow]No models found for '{provider}'[/yellow]")
             input("\nPress Enter to continue...")
             return
         clear_screen()
         self.console.print(f"[bold]Provider: {provider}[/bold]\n")
         self.console.print("[bold]📦 Configured Models:[/bold]")
         self.console.print("━" * 50)
         # Handle both dict and list formats
         if isinstance(models, dict):
             for name, definition in models.items():
@@ -822,74 +1058,88 @@ class SettingsTool:
             for name in models:
                 self.console.print(f"   Name: {name}")
                 self.console.print()
         input("Press Enter to return...")
     def manage_provider_settings(self):
         """Manage provider-specific settings (Antigravity, Gemini CLI)"""
         while True:
             clear_screen()
             available_providers = self.provider_settings_mgr.get_available_providers()
-            self.console.print(Panel.fit(
-                "[bold cyan]🔬 Provider-Specific Settings[/bold cyan]",
-                border_style="cyan"
-            ))
             self.console.print()
-            self.console.print("[bold]📋 Available Providers with Custom Settings[/bold]")
             self.console.print("━" * 70)
             for provider in available_providers:
                 modified = self.provider_settings_mgr.get_modified_settings(provider)
-                status = f"[yellow]{len(modified)} modified[/yellow]" if modified else "[dim]defaults[/dim]"
                 display_name = provider.replace("_", " ").title()
                 self.console.print(f"   • {display_name:20} {status}")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             self.console.print("[bold]⚙️  Select Provider to Configure[/bold]")
             self.console.print()
             for idx, provider in enumerate(available_providers, 1):
                 display_name = provider.replace("_", " ").title()
                 self.console.print(f"   {idx}. {display_name}")
-            self.console.print(f"   {len(available_providers) + 1}. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             choices = [str(i) for i in range(1, len(available_providers) + 2)]
             choice = Prompt.ask("Select option", choices=choices, show_choices=False)
             choice_idx = int(choice)
             if choice_idx == len(available_providers) + 1:
                 break
             provider = available_providers[choice_idx - 1]
             self._manage_single_provider_settings(provider)
     def _manage_single_provider_settings(self, provider: str):
         """Manage settings for a single provider"""
         while True:
             clear_screen()
             display_name = provider.replace("_", " ").title()
-            definitions = self.provider_settings_mgr.get_provider_settings_definitions(provider)
             current_values = self.provider_settings_mgr.get_all_current_values(provider)
-            self.console.print(Panel.fit(
-                f"[bold cyan]🔬 {display_name} Settings[/bold cyan]",
-                border_style="cyan"
-            ))
             self.console.print()
             self.console.print("[bold]📋 Current Settings[/bold]")
             self.console.print("━" * 70)
             # Display all settings with current values
             settings_list = list(definitions.keys())
             for idx, key in enumerate(settings_list, 1):
@@ -898,25 +1148,35 @@ class SettingsTool:
                 default = definition.get("default")
                 setting_type = definition.get("type", "str")
                 description = definition.get("description", "")
                 # Format value display
                 if setting_type == "bool":
-                    value_display = "[green]✓ Enabled[/green]" if current else "[red]✗ Disabled[/red]"
                 elif setting_type == "int":
                     value_display = f"[cyan]{current}[/cyan]"
                 else:
-                    value_display = f"[cyan]{current or '(not set)'}[/cyan]" if current else "[dim](not set)[/dim]"
                 # Check if modified from default
                 modified = current != default
                 mod_marker = "[yellow]*[/yellow]" if modified else " "
                 # Short key name for display (strip provider prefix)
                 short_key = key.replace(f"{provider.upper()}_", "")
-                self.console.print(f"  {mod_marker}{idx:2}. {short_key:35} {value_display}")
                 self.console.print(f"       [dim]{description}[/dim]")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print("[dim]* = modified from default[/dim]")
@@ -927,13 +1187,17 @@ class SettingsTool:
             self.console.print("   R. 🔄 Reset Setting to Default")
             self.console.print("   A. 🔄 Reset All to Defaults")
             self.console.print("   B. ↩️  Back to Provider Selection")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
-            choice = Prompt.ask("Select action", choices=["e", "r", "a", "b", "E", "R", "A", "B"], show_choices=False).lower()
             if choice == "b":
                 break
             elif choice == "e":
@@ -942,26 +1206,31 @@ class SettingsTool:
                 self._reset_provider_setting(provider, settings_list, definitions)
             elif choice == "a":
                 self._reset_all_provider_settings(provider, settings_list)
-    def _edit_provider_setting(self, provider: str, settings_list: List[str], definitions: Dict[str, Dict[str, Any]]):
         """Edit a single provider setting"""
         self.console.print("\n[bold]Select setting number to edit:[/bold]")
         choices = [str(i) for i in range(1, len(settings_list) + 1)]
         choice = IntPrompt.ask("Setting number", choices=choices)
         key = settings_list[choice - 1]
         definition = definitions[key]
         current = self.provider_settings_mgr.get_current_value(key, definition)
         default = definition.get("default")
         setting_type = definition.get("type", "str")
         short_key = key.replace(f"{provider.upper()}_", "")
         self.console.print(f"\n[bold]Editing: {short_key}[/bold]")
         self.console.print(f"Current value: [cyan]{current}[/cyan]")
         self.console.print(f"Default value: [dim]{default}[/dim]")
         self.console.print(f"Type: {setting_type}")
         if setting_type == "bool":
             new_value = Confirm.ask("\nEnable this setting?", default=current)
             self.provider_settings_mgr.set_value(key, new_value, definition)
@@ -972,71 +1241,415 @@ class SettingsTool:
             self.provider_settings_mgr.set_value(key, new_value, definition)
             self.console.print(f"\n[green]✅ {short_key} set to {new_value}![/green]")
         else:
-            new_value = Prompt.ask("\nNew value", default=str(current) if current else "").strip()
             if new_value:
                 self.provider_settings_mgr.set_value(key, new_value, definition)
                 self.console.print(f"\n[green]✅ {short_key} updated![/green]")
             else:
                 self.console.print("\n[yellow]No changes made[/yellow]")
         input("\nPress Enter to continue...")
-    def _reset_provider_setting(self, provider: str, settings_list: List[str], definitions: Dict[str, Dict[str, Any]]):
         """Reset a single provider setting to default"""
         self.console.print("\n[bold]Select setting number to reset:[/bold]")
         choices = [str(i) for i in range(1, len(settings_list) + 1)]
         choice = IntPrompt.ask("Setting number", choices=choices)
         key = settings_list[choice - 1]
         definition = definitions[key]
         default = definition.get("default")
         short_key = key.replace(f"{provider.upper()}_", "")
         if Confirm.ask(f"\nReset {short_key} to default ({default})?"):
             self.provider_settings_mgr.reset_to_default(key)
             self.console.print(f"\n[green]✅ {short_key} reset to default![/green]")
         else:
             self.console.print("\n[yellow]No changes made[/yellow]")
         input("\nPress Enter to continue...")
     def _reset_all_provider_settings(self, provider: str, settings_list: List[str]):
         """Reset all provider settings to defaults"""
         display_name = provider.replace("_", " ").title()
-        if Confirm.ask(f"\n[bold red]Reset ALL {display_name} settings to defaults?[/bold red]"):
             for key in settings_list:
                 self.provider_settings_mgr.reset_to_default(key)
-            self.console.print(f"\n[green]✅ All {display_name} settings reset to defaults![/green]")
         else:
             self.console.print("\n[yellow]No changes made[/yellow]")
         input("\nPress Enter to continue...")
     def manage_concurrency_limits(self):
         """Manage concurrency limits"""
         while True:
             clear_screen()
             limits = self.concurrency_mgr.get_current_limits()
-            self.console.print(Panel.fit(
-                "[bold cyan]⚡ Concurrency Limits Configuration[/bold cyan]",
-                border_style="cyan"
-            ))
             self.console.print()
             self.console.print("[bold]📋 Current Concurrency Settings[/bold]")
             self.console.print("━" * 70)
             if limits:
                 for provider, limit in limits.items():
                     self.console.print(f"   • {provider:15} {limit} requests/key")
                 self.console.print(f"   • Default:        1 request/key (all others)")
             else:
                 self.console.print("   • Default:        1 request/key (all providers)")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
@@ -1046,96 +1659,128 @@ class SettingsTool:
             self.console.print("   2. ✏️  Edit Existing Limit")
             self.console.print("   3. 🗑️  Remove Limit (reset to default)")
             self.console.print("   4. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
-            choice = Prompt.ask("Select option", choices=["1", "2", "3", "4"], show_choices=False)
             if choice == "1":
                 # Get available providers
                 available_providers = self.get_available_providers()
                 if not available_providers:
-                    self.console.print("\n[yellow]No providers with credentials found. Please add credentials first.[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show provider selection menu
                 self.console.print("\n[bold]Select provider:[/bold]")
                 for idx, prov in enumerate(available_providers, 1):
                     self.console.print(f"   {idx}. {prov}")
-                self.console.print(f"   {len(available_providers) + 1}. Enter custom provider name")
-                choice_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(available_providers) + 2)])
                 if choice_idx == len(available_providers) + 1:
                     provider = Prompt.ask("Provider name").strip().lower()
                 else:
                     provider = available_providers[choice_idx - 1]
                 if provider:
-                    limit = IntPrompt.ask("Max concurrent requests per key (1-100)", default=1)
                     if 1 <= limit <= 100:
                         self.concurrency_mgr.set_limit(provider, limit)
-                        self.console.print(f"\n[green]✅ Concurrency limit set for '{provider}': {limit} requests/key[/green]")
                     else:
-                        self.console.print("\n[red]❌ Limit must be between 1-100[/red]")
                     input("\nPress Enter to continue...")
             elif choice == "2":
                 if not limits:
                     self.console.print("\n[yellow]No limits to edit[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
                 self.console.print("\n[bold]Select provider to edit:[/bold]")
                 limits_list = list(limits.keys())
                 for idx, prov in enumerate(limits_list, 1):
                     self.console.print(f"   {idx}. {prov}")
-                choice_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(limits_list) + 1)])
                 provider = limits_list[choice_idx - 1]
                 current_limit = limits.get(provider, 1)
                 self.console.print(f"\nCurrent limit: {current_limit} requests/key")
-                new_limit = IntPrompt.ask("New limit (1-100) [press Enter to keep current]", default=current_limit)
                 if 1 <= new_limit <= 100:
                     if new_limit != current_limit:
                         self.concurrency_mgr.set_limit(provider, new_limit)
-                        self.console.print(f"\n[green]✅ Concurrency limit updated for '{provider}': {new_limit} requests/key[/green]")
                     else:
                         self.console.print("\n[yellow]No changes made[/yellow]")
                 else:
                     self.console.print("\n[red]Limit must be between 1-100[/red]")
                 input("\nPress Enter to continue...")
             elif choice == "3":
                 if not limits:
                     self.console.print("\n[yellow]No limits to remove[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
-                self.console.print("\n[bold]Select provider to remove limit from:[/bold]")
                 limits_list = list(limits.keys())
                 for idx, prov in enumerate(limits_list, 1):
                     self.console.print(f"   {idx}. {prov}")
-                choice_idx = IntPrompt.ask("Select option", choices=[str(i) for i in range(1, len(limits_list) + 1)])
                 provider = limits_list[choice_idx - 1]
-                if Confirm.ask(f"Remove concurrency limit for '{provider}' (reset to default 1)?"):
                     self.concurrency_mgr.remove_limit(provider)
-                    self.console.print(f"\n[green]✅ Limit removed for '{provider}' - using default (1 request/key)[/green]")
                     input("\nPress Enter to continue...")
             elif choice == "4":
                 break
     def save_and_exit(self):
         """Save pending changes and exit"""
         if self.settings.has_pending():
@@ -1150,9 +1795,9 @@ class SettingsTool:
         else:
             self.console.print("\n[dim]No changes to save[/dim]")
             input("\nPress Enter to return to launcher...")
         self.running = False
     def exit_without_saving(self):
         """Exit without saving"""
         if self.settings.has_pending():

 def clear_screen():
     """
+    Cross-platform terminal clear that works robustly on both
     classic Windows conhost and modern terminals (Windows Terminal, Linux, Mac).
     Uses native OS commands instead of ANSI escape sequences:
     - Windows (conhost & Windows Terminal): cls
     - Unix-like systems (Linux, Mac): clear
     """
+    os.system("cls" if os.name == "nt" else "clear")
 class AdvancedSettings:
     """Manages pending changes to .env"""
     def __init__(self):
         self.env_file = Path.cwd() / ".env"
         self.pending_changes = {}  # key -> value (None means delete)
         self.load_current_settings()
     def load_current_settings(self):
         """Load current .env values into env vars"""
         from dotenv import load_dotenv
         load_dotenv(override=True)
     def set(self, key: str, value: str):
         """Stage a change"""
         self.pending_changes[key] = value
     def remove(self, key: str):
         """Stage a removal"""
         self.pending_changes[key] = None
     def save(self):
         """Write pending changes to .env"""
         for key, value in self.pending_changes.items():
             else:
                 # Set key
                 set_key(str(self.env_file), key, value)
         self.pending_changes.clear()
         self.load_current_settings()
     def discard(self):
         """Discard pending changes"""
         self.pending_changes.clear()
     def has_pending(self) -> bool:
         """Check if there are pending changes"""
         return bool(self.pending_changes)
 class CustomProviderManager:
     """Manages custom provider API bases"""
     def __init__(self, settings: AdvancedSettings):
         self.settings = settings
     def get_current_providers(self) -> Dict[str, str]:
         """Get currently configured custom providers"""
         from proxy_app.provider_urls import PROVIDER_URL_MAP
         providers = {}
         for key, value in os.environ.items():
             if key.endswith("_API_BASE"):
                 if provider not in PROVIDER_URL_MAP:
                     providers[provider] = value
         return providers
     def add_provider(self, name: str, api_base: str):
         """Add PROVIDER_API_BASE"""
         key = f"{name.upper()}_API_BASE"
         self.settings.set(key, api_base)
     def edit_provider(self, name: str, api_base: str):
         """Edit PROVIDER_API_BASE"""
         self.add_provider(name, api_base)
     def remove_provider(self, name: str):
         """Remove PROVIDER_API_BASE"""
         key = f"{name.upper()}_API_BASE"
 class ModelDefinitionManager:
     """Manages PROVIDER_MODELS"""
     def __init__(self, settings: AdvancedSettings):
         self.settings = settings
     def get_current_provider_models(self, provider: str) -> Optional[Dict]:
         """Get currently configured models for a provider"""
         key = f"{provider.upper()}_MODELS"
             except (json.JSONDecodeError, ValueError):
                 return None
         return None
     def get_all_providers_with_models(self) -> Dict[str, int]:
         """Get all providers with model definitions"""
         providers = {}
                 except (json.JSONDecodeError, ValueError):
                     pass
         return providers
     def set_models(self, provider: str, models: Dict[str, Dict[str, Any]]):
         """Set PROVIDER_MODELS"""
         key = f"{provider.upper()}_MODELS"
         value = json.dumps(models)
         self.settings.set(key, value)
     def remove_models(self, provider: str):
         """Remove PROVIDER_MODELS"""
         key = f"{provider.upper()}_MODELS"
 class ConcurrencyManager:
     """Manages MAX_CONCURRENT_REQUESTS_PER_KEY_PROVIDER"""
     def __init__(self, settings: AdvancedSettings):
         self.settings = settings
     def get_current_limits(self) -> Dict[str, int]:
         """Get currently configured concurrency limits"""
         limits = {}
                 except (json.JSONDecodeError, ValueError):
                     pass
         return limits
     def set_limit(self, provider: str, limit: int):
         """Set concurrency limit"""
         key = f"MAX_CONCURRENT_REQUESTS_PER_KEY_{provider.upper()}"
         self.settings.set(key, str(limit))
     def remove_limit(self, provider: str):
         """Remove concurrency limit (reset to default)"""
         key = f"MAX_CONCURRENT_REQUESTS_PER_KEY_{provider.upper()}"
         self.settings.remove(key)
+class RotationModeManager:
+    """Manages ROTATION_MODE_PROVIDER settings for sequential/balanced credential rotation"""
+    VALID_MODES = ["balanced", "sequential"]
+    def __init__(self, settings: AdvancedSettings):
+        self.settings = settings
+    def get_current_modes(self) -> Dict[str, str]:
+        """Get currently configured rotation modes"""
+        modes = {}
+        for key, value in os.environ.items():
+            if key.startswith("ROTATION_MODE_"):
+                provider = key.replace("ROTATION_MODE_", "").lower()
+                if value.lower() in self.VALID_MODES:
+                    modes[provider] = value.lower()
+        return modes
+    def get_default_mode(self, provider: str) -> str:
+        """Get the default rotation mode for a provider"""
+        try:
+            from rotator_library.providers import PROVIDER_PLUGINS
+            provider_class = PROVIDER_PLUGINS.get(provider.lower())
+            if provider_class and hasattr(provider_class, "default_rotation_mode"):
+                return provider_class.default_rotation_mode
+            return "balanced"
+        except ImportError:
+            # Fallback defaults if import fails
+            if provider.lower() == "antigravity":
+                return "sequential"
+            return "balanced"
+    def get_effective_mode(self, provider: str) -> str:
+        """Get the effective rotation mode (configured or default)"""
+        configured = self.get_current_modes().get(provider.lower())
+        if configured:
+            return configured
+        return self.get_default_mode(provider)
+    def set_mode(self, provider: str, mode: str):
+        """Set rotation mode for a provider"""
+        if mode.lower() not in self.VALID_MODES:
+            raise ValueError(
+                f"Invalid rotation mode: {mode}. Must be one of {self.VALID_MODES}"
+            )
+        key = f"ROTATION_MODE_{provider.upper()}"
+        self.settings.set(key, mode.lower())
+    def remove_mode(self, provider: str):
+        """Remove rotation mode (reset to provider default)"""
+        key = f"ROTATION_MODE_{provider.upper()}"
+        self.settings.remove(key)
+class PriorityMultiplierManager:
+    """Manages CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N> settings"""
+    def __init__(self, settings: AdvancedSettings):
+        self.settings = settings
+    def get_provider_defaults(self, provider: str) -> Dict[int, int]:
+        """Get default priority multipliers from provider class"""
+        try:
+            from rotator_library.providers import PROVIDER_PLUGINS
+            provider_class = PROVIDER_PLUGINS.get(provider.lower())
+            if provider_class and hasattr(
+                provider_class, "default_priority_multipliers"
+            ):
+                return dict(provider_class.default_priority_multipliers)
+        except ImportError:
+            pass
+        return {}
+    def get_sequential_fallback(self, provider: str) -> int:
+        """Get sequential fallback multiplier from provider class"""
+        try:
+            from rotator_library.providers import PROVIDER_PLUGINS
+            provider_class = PROVIDER_PLUGINS.get(provider.lower())
+            if provider_class and hasattr(
+                provider_class, "default_sequential_fallback_multiplier"
+            ):
+                return provider_class.default_sequential_fallback_multiplier
+        except ImportError:
+            pass
+        return 1
+    def get_current_multipliers(self) -> Dict[str, Dict[int, int]]:
+        """Get currently configured priority multipliers from env vars"""
+        multipliers: Dict[str, Dict[int, int]] = {}
+        for key, value in os.environ.items():
+            if key.startswith("CONCURRENCY_MULTIPLIER_") and "_PRIORITY_" in key:
+                try:
+                    # Parse: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>
+                    parts = key.split("_PRIORITY_")
+                    provider = parts[0].replace("CONCURRENCY_MULTIPLIER_", "").lower()
+                    remainder = parts[1]
+                    # Check if mode-specific (has _SEQUENTIAL or _BALANCED suffix)
+                    if "_" in remainder:
+                        continue  # Skip mode-specific for now (show in separate view)
+                    priority = int(remainder)
+                    multiplier = int(value)
+                    if provider not in multipliers:
+                        multipliers[provider] = {}
+                    multipliers[provider][priority] = multiplier
+                except (ValueError, IndexError):
+                    pass
+        return multipliers
+    def get_effective_multiplier(self, provider: str, priority: int) -> int:
+        """Get effective multiplier (configured, provider default, or 1)"""
+        # Check env var override
+        current = self.get_current_multipliers()
+        if provider.lower() in current:
+            if priority in current[provider.lower()]:
+                return current[provider.lower()][priority]
+        # Check provider defaults
+        defaults = self.get_provider_defaults(provider)
+        if priority in defaults:
+            return defaults[priority]
+        # Return 1 (no multiplier)
+        return 1
+    def set_multiplier(self, provider: str, priority: int, multiplier: int):
+        """Set priority multiplier for a provider"""
+        if multiplier < 1:
+            raise ValueError("Multiplier must be >= 1")
+        key = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_{priority}"
+        self.settings.set(key, str(multiplier))
+    def remove_multiplier(self, provider: str, priority: int):
+        """Remove multiplier (reset to provider default)"""
+        key = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_{priority}"
+        self.settings.remove(key)
 # =============================================================================
 # PROVIDER-SPECIFIC SETTINGS DEFINITIONS
 # =============================================================================
 class ProviderSettingsManager:
     """Manages provider-specific configuration settings"""
     def __init__(self, settings: AdvancedSettings):
         self.settings = settings
     def get_available_providers(self) -> List[str]:
         """Get list of providers with specific settings available"""
         return list(PROVIDER_SETTINGS_MAP.keys())
+    def get_provider_settings_definitions(
+        self, provider: str
+    ) -> Dict[str, Dict[str, Any]]:
         """Get settings definitions for a provider"""
         return PROVIDER_SETTINGS_MAP.get(provider, {})
     def get_current_value(self, key: str, definition: Dict[str, Any]) -> Any:
         """Get current value of a setting from environment"""
         env_value = os.getenv(key)
         if env_value is None:
             return definition.get("default")
         setting_type = definition.get("type", "str")
         try:
             if setting_type == "bool":
                 return env_value
         except (ValueError, AttributeError):
             return definition.get("default")
     def get_all_current_values(self, provider: str) -> Dict[str, Any]:
         """Get all current values for a provider"""
         definitions = self.get_provider_settings_definitions(provider)
         for key, definition in definitions.items():
             values[key] = self.get_current_value(key, definition)
         return values
     def set_value(self, key: str, value: Any, definition: Dict[str, Any]):
         """Set a setting value, converting to string for .env storage"""
         setting_type = definition.get("type", "str")
         else:
             str_value = str(value)
         self.settings.set(key, str_value)
     def reset_to_default(self, key: str):
         """Remove a setting to reset it to default"""
         self.settings.remove(key)
     def get_modified_settings(self, provider: str) -> Dict[str, Any]:
         """Get settings that differ from defaults"""
         definitions = self.get_provider_settings_definitions(provider)
 class SettingsTool:
     """Main settings tool TUI"""
     def __init__(self):
         self.console = Console()
         self.settings = AdvancedSettings()
         self.provider_mgr = CustomProviderManager(self.settings)
         self.model_mgr = ModelDefinitionManager(self.settings)
         self.concurrency_mgr = ConcurrencyManager(self.settings)
+        self.rotation_mgr = RotationModeManager(self.settings)
+        self.priority_multiplier_mgr = PriorityMultiplierManager(self.settings)
         self.provider_settings_mgr = ProviderSettingsManager(self.settings)
         self.running = True
     def get_available_providers(self) -> List[str]:
         """Get list of providers that have credentials configured"""
         env_file = Path.cwd() / ".env"
         providers = set()
         # Scan for providers with API keys from local .env
         if env_file.exists():
             try:
+                with open(env_file, "r", encoding="utf-8") as f:
                     for line in f:
                         line = line.strip()
+                        # Skip comments and empty lines
+                        if not line or line.startswith("#"):
+                            continue
+                        if (
+                            "_API_KEY" in line
+                            and "PROXY_API_KEY" not in line
+                            and "=" in line
+                        ):
                             provider = line.split("_API_KEY")[0].strip().lower()
                             providers.add(provider)
             except (IOError, OSError):
                 pass
         # Also check for OAuth providers from files
+        oauth_dir = Path("oauth_creds")
         if oauth_dir.exists():
             for file in oauth_dir.glob("*_oauth_*.json"):
                 provider = file.name.split("_oauth_")[0]
                 providers.add(provider)
         return sorted(list(providers))
     def run(self):
         """Main loop"""
         while self.running:
             self.show_main_menu()
     def show_main_menu(self):
         """Display settings categories"""
         clear_screen()
+        self.console.print(
+            Panel.fit(
+                "[bold cyan]🔧 Advanced Settings Configuration[/bold cyan]",
+                border_style="cyan",
+            )
+        )
         self.console.print()
         self.console.print("[bold]⚙️  Configuration Categories[/bold]")
         self.console.print()
         self.console.print("   1. 🌐 Custom Provider API Bases")
         self.console.print("   2. 📦 Provider Model Definitions")
         self.console.print("   3. ⚡ Concurrency Limits")
+        self.console.print("   4. 🔄 Rotation Modes")
+        self.console.print("   5. 🔬 Provider-Specific Settings")
+        self.console.print("   6. 💾 Save & Exit")
+        self.console.print("   7. 🚫 Exit Without Saving")
         self.console.print()
         self.console.print("━" * 70)
         if self.settings.has_pending():
+            self.console.print(
+                '[yellow]ℹ️  Changes are pending until you select "Save & Exit"[/yellow]'
+            )
         else:
             self.console.print("[dim]ℹ️  No pending changes[/dim]")
         self.console.print()
+        self.console.print(
+            "[dim]⚠️  Model filters not supported - edit .env for IGNORE_MODELS_* / WHITELIST_MODELS_*[/dim]"
+        )
         self.console.print()
+        choice = Prompt.ask(
+            "Select option",
+            choices=["1", "2", "3", "4", "5", "6", "7"],
+            show_choices=False,
+        )
         if choice == "1":
             self.manage_custom_providers()
         elif choice == "2":
         elif choice == "3":
             self.manage_concurrency_limits()
         elif choice == "4":
+            self.manage_rotation_modes()
         elif choice == "5":
+            self.manage_provider_settings()
         elif choice == "6":
+            self.save_and_exit()
+        elif choice == "7":
             self.exit_without_saving()
     def manage_custom_providers(self):
         """Manage custom provider API bases"""
         while True:
             clear_screen()
             providers = self.provider_mgr.get_current_providers()
+            self.console.print(
+                Panel.fit(
+                    "[bold cyan]🌐 Custom Provider API Bases[/bold cyan]",
+                    border_style="cyan",
+                )
+            )
             self.console.print()
             self.console.print("[bold]📋 Configured Custom Providers[/bold]")
             self.console.print("━" * 70)
             if providers:
                 for name, base in providers.items():
                     self.console.print(f"   • {name:15} {base}")
             else:
                 self.console.print("   [dim]No custom providers configured[/dim]")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             self.console.print("   2. ✏️  Edit Existing Provider")
             self.console.print("   3. 🗑️  Remove Provider")
             self.console.print("   4. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
+            choice = Prompt.ask(
+                "Select option", choices=["1", "2", "3", "4"], show_choices=False
+            )
             if choice == "1":
                 name = Prompt.ask("Provider name (e.g., 'opencode')").strip().lower()
                 if name:
                     api_base = Prompt.ask("API Base URL").strip()
                     if api_base:
                         self.provider_mgr.add_provider(name, api_base)
+                        self.console.print(
+                            f"\n[green]✅ Custom provider '{name}' configured![/green]"
+                        )
+                        self.console.print(
+                            f"   To use: set {name.upper()}_API_KEY in credentials"
+                        )
                         input("\nPress Enter to continue...")
             elif choice == "2":
                 if not providers:
                     self.console.print("\n[yellow]No providers to edit[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
                 self.console.print("\n[bold]Select provider to edit:[/bold]")
                 providers_list = list(providers.keys())
                 for idx, prov in enumerate(providers_list, 1):
                     self.console.print(f"   {idx}. {prov}")
+                choice_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(providers_list) + 1)],
+                )
                 name = providers_list[choice_idx - 1]
                 current_base = providers.get(name, "")
                 self.console.print(f"\nCurrent API Base: {current_base}")
+                new_base = Prompt.ask(
+                    "New API Base [press Enter to keep current]", default=current_base
+                ).strip()
                 if new_base and new_base != current_base:
                     self.provider_mgr.edit_provider(name, new_base)
+                    self.console.print(
+                        f"\n[green]✅ Custom provider '{name}' updated![/green]"
+                    )
                 else:
                     self.console.print("\n[yellow]No changes made[/yellow]")
                 input("\nPress Enter to continue...")
             elif choice == "3":
                 if not providers:
                     self.console.print("\n[yellow]No providers to remove[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
                 self.console.print("\n[bold]Select provider to remove:[/bold]")
                 providers_list = list(providers.keys())
                 for idx, prov in enumerate(providers_list, 1):
                     self.console.print(f"   {idx}. {prov}")
+                choice_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(providers_list) + 1)],
+                )
                 name = providers_list[choice_idx - 1]
                 if Confirm.ask(f"Remove '{name}'?"):
                     self.provider_mgr.remove_provider(name)
+                    self.console.print(
+                        f"\n[green]✅ Provider '{name}' removed![/green]"
+                    )
                     input("\nPress Enter to continue...")
             elif choice == "4":
                 break
     def manage_model_definitions(self):
         """Manage provider model definitions"""
         while True:
             clear_screen()
             all_providers = self.model_mgr.get_all_providers_with_models()
+            self.console.print(
+                Panel.fit(
+                    "[bold cyan]📦 Provider Model Definitions[/bold cyan]",
+                    border_style="cyan",
+                )
+            )
             self.console.print()
             self.console.print("[bold]📋 Configured Provider Models[/bold]")
             self.console.print("━" * 70)
             if all_providers:
                 for provider, count in all_providers.items():
+                    self.console.print(
+                        f"   • {provider:15} {count} model{'s' if count > 1 else ''}"
+                    )
             else:
                 self.console.print("   [dim]No model definitions configured[/dim]")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             self.console.print("   3. 👁️  View Provider Models")
             self.console.print("   4. 🗑️  Remove Provider Models")
             self.console.print("   5. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
+            choice = Prompt.ask(
+                "Select option", choices=["1", "2", "3", "4", "5"], show_choices=False
+            )
             if choice == "1":
                 self.add_model_definitions()
             elif choice == "2":
                     self.console.print("\n[yellow]No providers to remove[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
+                self.console.print(
+                    "\n[bold]Select provider to remove models from:[/bold]"
+                )
                 providers_list = list(all_providers.keys())
                 for idx, prov in enumerate(providers_list, 1):
                     self.console.print(f"   {idx}. {prov}")
+                choice_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(providers_list) + 1)],
+                )
                 provider = providers_list[choice_idx - 1]
                 if Confirm.ask(f"Remove all model definitions for '{provider}'?"):
                     self.model_mgr.remove_models(provider)
+                    self.console.print(
+                        f"\n[green]✅ Model definitions removed for '{provider}'![/green]"
+                    )
                     input("\nPress Enter to continue...")
             elif choice == "5":
                 break
     def add_model_definitions(self):
         """Add model definitions for a provider"""
         # Get available providers from credentials
         available_providers = self.get_available_providers()
         if not available_providers:
+            self.console.print(
+                "\n[yellow]No providers with credentials found. Please add credentials first.[/yellow]"
+            )
             input("\nPress Enter to continue...")
             return
         # Show provider selection menu
         self.console.print("\n[bold]Select provider:[/bold]")
         for idx, prov in enumerate(available_providers, 1):
             self.console.print(f"   {idx}. {prov}")
+        self.console.print(
+            f"   {len(available_providers) + 1}. Enter custom provider name"
+        )
+        choice = IntPrompt.ask(
+            "Select option",
+            choices=[str(i) for i in range(1, len(available_providers) + 2)],
+        )
         if choice == len(available_providers) + 1:
             provider = Prompt.ask("Provider name").strip().lower()
         else:
             provider = available_providers[choice - 1]
         if not provider:
             return
         self.console.print("\nHow would you like to define models?")
         self.console.print("   1. Simple list (names only)")
         self.console.print("   2. Advanced (names with IDs and options)")
         mode = Prompt.ask("Select mode", choices=["1", "2"], show_choices=False)
         models = {}
         if mode == "1":
             # Simple mode
             while True:
                     break
                 if name:
                     model_def = {}
+                    model_id = Prompt.ask(
+                        f"Model ID [press Enter to use '{name}']", default=name
+                    ).strip()
                     if model_id and model_id != name:
                         model_def["id"] = model_id
                     # Optional: model options
+                    if Confirm.ask(
+                        "Add model options (e.g., temperature limits)?", default=False
+                    ):
+                        self.console.print(
+                            "\nEnter options as key=value pairs (one per line, 'done' to finish):"
+                        )
                         options = {}
                         while True:
                             opt = Prompt.ask("Option").strip()
                                 options[key.strip()] = value
                         if options:
                             model_def["options"] = options
                     models[name] = model_def
         if models:
             self.model_mgr.set_models(provider, models)
+            self.console.print(
+                f"\n[green]✅ Model definitions saved for '{provider}'![/green]"
+            )
         else:
             self.console.print("\n[yellow]No models added[/yellow]")
         input("\nPress Enter to continue...")
     def edit_model_definitions(self, providers: List[str]):
         """Edit existing model definitions"""
         # Show numbered list
         self.console.print("\n[bold]Select provider to edit:[/bold]")
         for idx, prov in enumerate(providers, 1):
             self.console.print(f"   {idx}. {prov}")
+        choice_idx = IntPrompt.ask(
+            "Select option", choices=[str(i) for i in range(1, len(providers) + 1)]
+        )
         provider = providers[choice_idx - 1]
         current_models = self.model_mgr.get_current_provider_models(provider)
         if not current_models:
             self.console.print(f"\n[yellow]No models found for '{provider}'[/yellow]")
             input("\nPress Enter to continue...")
             return
         # Convert to dict if list
         if isinstance(current_models, list):
             current_models = {m: {} for m in current_models}
         while True:
             clear_screen()
             self.console.print(f"[bold]Editing models for: {provider}[/bold]\n")
             self.console.print("Current models:")
             for i, (name, definition) in enumerate(current_models.items(), 1):
+                model_id = (
+                    definition.get("id", name) if isinstance(definition, dict) else name
+                )
                 self.console.print(f"   {i}. {name} (ID: {model_id})")
             self.console.print("\nOptions:")
             self.console.print("   1. Add new model")
             self.console.print("   2. Edit existing model")
             self.console.print("   3. Remove model")
             self.console.print("   4. Done")
+            choice = Prompt.ask(
+                "\nSelect option", choices=["1", "2", "3", "4"], show_choices=False
+            )
             if choice == "1":
                 name = Prompt.ask("New model name").strip()
                 if name and name not in current_models:
                     model_id = Prompt.ask("Model ID", default=name).strip()
                     current_models[name] = {"id": model_id} if model_id != name else {}
             elif choice == "2":
                 # Show numbered list
                 models_list = list(current_models.keys())
                 self.console.print("\n[bold]Select model to edit:[/bold]")
                 for idx, model_name in enumerate(models_list, 1):
                     self.console.print(f"   {idx}. {model_name}")
+                model_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(models_list) + 1)],
+                )
                 name = models_list[model_idx - 1]
                 current_def = current_models[name]
+                current_id = (
+                    current_def.get("id", name)
+                    if isinstance(current_def, dict)
+                    else name
+                )
                 new_id = Prompt.ask("Model ID", default=current_id).strip()
                 current_models[name] = {"id": new_id} if new_id != name else {}
             elif choice == "3":
                 # Show numbered list
                 models_list = list(current_models.keys())
                 self.console.print("\n[bold]Select model to remove:[/bold]")
                 for idx, model_name in enumerate(models_list, 1):
                     self.console.print(f"   {idx}. {model_name}")
+                model_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(models_list) + 1)],
+                )
                 name = models_list[model_idx - 1]
                 if Confirm.ask(f"Remove '{name}'?"):
                     del current_models[name]
             elif choice == "4":
                 break
         if current_models:
             self.model_mgr.set_models(provider, current_models)
             self.console.print(f"\n[green]✅ Models updated for '{provider}'![/green]")
         else:
+            self.console.print(
+                "\n[yellow]No models left - removing definition[/yellow]"
+            )
             self.model_mgr.remove_models(provider)
         input("\nPress Enter to continue...")
     def view_model_definitions(self, providers: List[str]):
         """View model definitions for a provider"""
         # Show numbered list
         self.console.print("\n[bold]Select provider to view:[/bold]")
         for idx, prov in enumerate(providers, 1):
             self.console.print(f"   {idx}. {prov}")
+        choice_idx = IntPrompt.ask(
+            "Select option", choices=[str(i) for i in range(1, len(providers) + 1)]
+        )
         provider = providers[choice_idx - 1]
         models = self.model_mgr.get_current_provider_models(provider)
         if not models:
             self.console.print(f"\n[yellow]No models found for '{provider}'[/yellow]")
             input("\nPress Enter to continue...")
             return
         clear_screen()
         self.console.print(f"[bold]Provider: {provider}[/bold]\n")
         self.console.print("[bold]📦 Configured Models:[/bold]")
         self.console.print("━" * 50)
         # Handle both dict and list formats
         if isinstance(models, dict):
             for name, definition in models.items():
             for name in models:
                 self.console.print(f"   Name: {name}")
                 self.console.print()
         input("Press Enter to return...")
     def manage_provider_settings(self):
         """Manage provider-specific settings (Antigravity, Gemini CLI)"""
         while True:
             clear_screen()
             available_providers = self.provider_settings_mgr.get_available_providers()
+            self.console.print(
+                Panel.fit(
+                    "[bold cyan]🔬 Provider-Specific Settings[/bold cyan]",
+                    border_style="cyan",
+                )
+            )
             self.console.print()
+            self.console.print(
+                "[bold]📋 Available Providers with Custom Settings[/bold]"
+            )
             self.console.print("━" * 70)
             for provider in available_providers:
                 modified = self.provider_settings_mgr.get_modified_settings(provider)
+                status = (
+                    f"[yellow]{len(modified)} modified[/yellow]"
+                    if modified
+                    else "[dim]defaults[/dim]"
+                )
                 display_name = provider.replace("_", " ").title()
                 self.console.print(f"   • {display_name:20} {status}")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             self.console.print("[bold]⚙️  Select Provider to Configure[/bold]")
             self.console.print()
             for idx, provider in enumerate(available_providers, 1):
                 display_name = provider.replace("_", " ").title()
                 self.console.print(f"   {idx}. {display_name}")
+            self.console.print(
+                f"   {len(available_providers) + 1}. ↩️  Back to Settings Menu"
+            )
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             choices = [str(i) for i in range(1, len(available_providers) + 2)]
             choice = Prompt.ask("Select option", choices=choices, show_choices=False)
             choice_idx = int(choice)
             if choice_idx == len(available_providers) + 1:
                 break
             provider = available_providers[choice_idx - 1]
             self._manage_single_provider_settings(provider)
     def _manage_single_provider_settings(self, provider: str):
         """Manage settings for a single provider"""
         while True:
             clear_screen()
             display_name = provider.replace("_", " ").title()
+            definitions = self.provider_settings_mgr.get_provider_settings_definitions(
+                provider
+            )
             current_values = self.provider_settings_mgr.get_all_current_values(provider)
+            self.console.print(
+                Panel.fit(
+                    f"[bold cyan]🔬 {display_name} Settings[/bold cyan]",
+                    border_style="cyan",
+                )
+            )
             self.console.print()
             self.console.print("[bold]📋 Current Settings[/bold]")
             self.console.print("━" * 70)
             # Display all settings with current values
             settings_list = list(definitions.keys())
             for idx, key in enumerate(settings_list, 1):
                 default = definition.get("default")
                 setting_type = definition.get("type", "str")
                 description = definition.get("description", "")
                 # Format value display
                 if setting_type == "bool":
+                    value_display = (
+                        "[green]✓ Enabled[/green]"
+                        if current
+                        else "[red]✗ Disabled[/red]"
+                    )
                 elif setting_type == "int":
                     value_display = f"[cyan]{current}[/cyan]"
                 else:
+                    value_display = (
+                        f"[cyan]{current or '(not set)'}[/cyan]"
+                        if current
+                        else "[dim](not set)[/dim]"
+                    )
                 # Check if modified from default
                 modified = current != default
                 mod_marker = "[yellow]*[/yellow]" if modified else " "
                 # Short key name for display (strip provider prefix)
                 short_key = key.replace(f"{provider.upper()}_", "")
+                self.console.print(
+                    f"  {mod_marker}{idx:2}. {short_key:35} {value_display}"
+                )
                 self.console.print(f"       [dim]{description}[/dim]")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print("[dim]* = modified from default[/dim]")
             self.console.print("   R. 🔄 Reset Setting to Default")
             self.console.print("   A. 🔄 Reset All to Defaults")
             self.console.print("   B. ↩️  Back to Provider Selection")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
+            choice = Prompt.ask(
+                "Select action",
+                choices=["e", "r", "a", "b", "E", "R", "A", "B"],
+                show_choices=False,
+            ).lower()
             if choice == "b":
                 break
             elif choice == "e":
                 self._reset_provider_setting(provider, settings_list, definitions)
             elif choice == "a":
                 self._reset_all_provider_settings(provider, settings_list)
+    def _edit_provider_setting(
+        self,
+        provider: str,
+        settings_list: List[str],
+        definitions: Dict[str, Dict[str, Any]],
+    ):
         """Edit a single provider setting"""
         self.console.print("\n[bold]Select setting number to edit:[/bold]")
         choices = [str(i) for i in range(1, len(settings_list) + 1)]
         choice = IntPrompt.ask("Setting number", choices=choices)
         key = settings_list[choice - 1]
         definition = definitions[key]
         current = self.provider_settings_mgr.get_current_value(key, definition)
         default = definition.get("default")
         setting_type = definition.get("type", "str")
         short_key = key.replace(f"{provider.upper()}_", "")
         self.console.print(f"\n[bold]Editing: {short_key}[/bold]")
         self.console.print(f"Current value: [cyan]{current}[/cyan]")
         self.console.print(f"Default value: [dim]{default}[/dim]")
         self.console.print(f"Type: {setting_type}")
         if setting_type == "bool":
             new_value = Confirm.ask("\nEnable this setting?", default=current)
             self.provider_settings_mgr.set_value(key, new_value, definition)
             self.provider_settings_mgr.set_value(key, new_value, definition)
             self.console.print(f"\n[green]✅ {short_key} set to {new_value}![/green]")
         else:
+            new_value = Prompt.ask(
+                "\nNew value", default=str(current) if current else ""
+            ).strip()
             if new_value:
                 self.provider_settings_mgr.set_value(key, new_value, definition)
                 self.console.print(f"\n[green]✅ {short_key} updated![/green]")
             else:
                 self.console.print("\n[yellow]No changes made[/yellow]")
         input("\nPress Enter to continue...")
+    def _reset_provider_setting(
+        self,
+        provider: str,
+        settings_list: List[str],
+        definitions: Dict[str, Dict[str, Any]],
+    ):
         """Reset a single provider setting to default"""
         self.console.print("\n[bold]Select setting number to reset:[/bold]")
         choices = [str(i) for i in range(1, len(settings_list) + 1)]
         choice = IntPrompt.ask("Setting number", choices=choices)
         key = settings_list[choice - 1]
         definition = definitions[key]
         default = definition.get("default")
         short_key = key.replace(f"{provider.upper()}_", "")
         if Confirm.ask(f"\nReset {short_key} to default ({default})?"):
             self.provider_settings_mgr.reset_to_default(key)
             self.console.print(f"\n[green]✅ {short_key} reset to default![/green]")
         else:
             self.console.print("\n[yellow]No changes made[/yellow]")
         input("\nPress Enter to continue...")
     def _reset_all_provider_settings(self, provider: str, settings_list: List[str]):
         """Reset all provider settings to defaults"""
         display_name = provider.replace("_", " ").title()
+        if Confirm.ask(
+            f"\n[bold red]Reset ALL {display_name} settings to defaults?[/bold red]"
+        ):
             for key in settings_list:
                 self.provider_settings_mgr.reset_to_default(key)
+            self.console.print(
+                f"\n[green]✅ All {display_name} settings reset to defaults![/green]"
+            )
         else:
             self.console.print("\n[yellow]No changes made[/yellow]")
         input("\nPress Enter to continue...")
+    def manage_rotation_modes(self):
+        """Manage credential rotation modes (sequential vs balanced)"""
+        while True:
+            clear_screen()
+            modes = self.rotation_mgr.get_current_modes()
+            available_providers = self.get_available_providers()
+            self.console.print(
+                Panel.fit(
+                    "[bold cyan]🔄 Credential Rotation Mode Configuration[/bold cyan]",
+                    border_style="cyan",
+                )
+            )
+            self.console.print()
+            self.console.print("[bold]📋 Rotation Modes Explained[/bold]")
+            self.console.print("━" * 70)
+            self.console.print(
+                "   [cyan]balanced[/cyan]   - Rotate credentials evenly across requests (default)"
+            )
+            self.console.print(
+                "   [cyan]sequential[/cyan] - Use one credential until exhausted (429), then switch"
+            )
+            self.console.print()
+            self.console.print("[bold]📋 Current Rotation Mode Settings[/bold]")
+            self.console.print("━" * 70)
+            if modes:
+                for provider, mode in modes.items():
+                    default_mode = self.rotation_mgr.get_default_mode(provider)
+                    is_custom = mode != default_mode
+                    marker = "[yellow]*[/yellow]" if is_custom else " "
+                    mode_display = (
+                        f"[green]{mode}[/green]"
+                        if mode == "sequential"
+                        else f"[blue]{mode}[/blue]"
+                    )
+                    self.console.print(f"  {marker}• {provider:20} {mode_display}")
+            # Show providers with default modes
+            providers_with_defaults = [p for p in available_providers if p not in modes]
+            if providers_with_defaults:
+                self.console.print()
+                self.console.print("[dim]Providers using default modes:[/dim]")
+                for provider in providers_with_defaults:
+                    default_mode = self.rotation_mgr.get_default_mode(provider)
+                    mode_display = (
+                        f"[green]{default_mode}[/green]"
+                        if default_mode == "sequential"
+                        else f"[blue]{default_mode}[/blue]"
+                    )
+                    self.console.print(
+                        f"   • {provider:20} {mode_display} [dim](default)[/dim]"
+                    )
+            self.console.print()
+            self.console.print("━" * 70)
+            self.console.print(
+                "[dim]* = custom setting (differs from provider default)[/dim]"
+            )
+            self.console.print()
+            self.console.print("[bold]⚙️  Actions[/bold]")
+            self.console.print()
+            self.console.print("   1. ➕ Set Rotation Mode for Provider")
+            self.console.print("   2. 🗑️  Reset to Provider Default")
+            self.console.print("   3. ⚡ Configure Priority Concurrency Multipliers")
+            self.console.print("   4. ↩️  Back to Settings Menu")
+            self.console.print()
+            self.console.print("━" * 70)
+            self.console.print()
+            choice = Prompt.ask(
+                "Select option", choices=["1", "2", "3", "4"], show_choices=False
+            )
+            if choice == "1":
+                if not available_providers:
+                    self.console.print(
+                        "\n[yellow]No providers with credentials found. Please add credentials first.[/yellow]"
+                    )
+                    input("\nPress Enter to continue...")
+                    continue
+                # Show provider selection menu
+                self.console.print("\n[bold]Select provider:[/bold]")
+                for idx, prov in enumerate(available_providers, 1):
+                    current_mode = self.rotation_mgr.get_effective_mode(prov)
+                    mode_display = (
+                        f"[green]{current_mode}[/green]"
+                        if current_mode == "sequential"
+                        else f"[blue]{current_mode}[/blue]"
+                    )
+                    self.console.print(f"   {idx}. {prov} ({mode_display})")
+                self.console.print(
+                    f"   {len(available_providers) + 1}. Enter custom provider name"
+                )
+                choice_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(available_providers) + 2)],
+                )
+                if choice_idx == len(available_providers) + 1:
+                    provider = Prompt.ask("Provider name").strip().lower()
+                else:
+                    provider = available_providers[choice_idx - 1]
+                if provider:
+                    current_mode = self.rotation_mgr.get_effective_mode(provider)
+                    self.console.print(
+                        f"\nCurrent mode for {provider}: [cyan]{current_mode}[/cyan]"
+                    )
+                    self.console.print("\nSelect new rotation mode:")
+                    self.console.print(
+                        "   1. [blue]balanced[/blue] - Rotate credentials evenly"
+                    )
+                    self.console.print(
+                        "   2. [green]sequential[/green] - Use until exhausted"
+                    )
+                    mode_choice = Prompt.ask(
+                        "Select mode", choices=["1", "2"], show_choices=False
+                    )
+                    new_mode = "balanced" if mode_choice == "1" else "sequential"
+                    self.rotation_mgr.set_mode(provider, new_mode)
+                    self.console.print(
+                        f"\n[green]✅ Rotation mode for '{provider}' set to {new_mode}![/green]"
+                    )
+                    input("\nPress Enter to continue...")
+            elif choice == "2":
+                if not modes:
+                    self.console.print(
+                        "\n[yellow]No custom rotation modes to reset[/yellow]"
+                    )
+                    input("\nPress Enter to continue...")
+                    continue
+                # Show numbered list
+                self.console.print(
+                    "\n[bold]Select provider to reset to default:[/bold]"
+                )
+                modes_list = list(modes.keys())
+                for idx, prov in enumerate(modes_list, 1):
+                    default_mode = self.rotation_mgr.get_default_mode(prov)
+                    self.console.print(
+                        f"   {idx}. {prov} (will reset to: {default_mode})"
+                    )
+                choice_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(modes_list) + 1)],
+                )
+                provider = modes_list[choice_idx - 1]
+                default_mode = self.rotation_mgr.get_default_mode(provider)
+                if Confirm.ask(f"Reset '{provider}' to default mode ({default_mode})?"):
+                    self.rotation_mgr.remove_mode(provider)
+                    self.console.print(
+                        f"\n[green]✅ Rotation mode for '{provider}' reset to default ({default_mode})![/green]"
+                    )
+                    input("\nPress Enter to continue...")
+            elif choice == "3":
+                self.manage_priority_multipliers()
+            elif choice == "4":
+                break
+    def manage_priority_multipliers(self):
+        """Manage priority-based concurrency multipliers per provider"""
+        clear_screen()
+        current_multipliers = self.priority_multiplier_mgr.get_current_multipliers()
+        available_providers = self.get_available_providers()
+        self.console.print(
+            Panel.fit(
+                "[bold cyan]⚡ Priority Concurrency Multipliers[/bold cyan]",
+                border_style="cyan",
+            )
+        )
+        self.console.print()
+        self.console.print("[bold]📋 Current Priority Multiplier Settings[/bold]")
+        self.console.print("━" * 70)
+        # Show all providers with their priority multipliers
+        has_settings = False
+        for provider in available_providers:
+            defaults = self.priority_multiplier_mgr.get_provider_defaults(provider)
+            overrides = current_multipliers.get(provider, {})
+            seq_fallback = self.priority_multiplier_mgr.get_sequential_fallback(
+                provider
+            )
+            rotation_mode = self.rotation_mgr.get_effective_mode(provider)
+            if defaults or overrides or seq_fallback != 1:
+                has_settings = True
+                self.console.print(
+                    f"\n   [bold]{provider}[/bold] ({rotation_mode} mode)"
+                )
+                # Combine and display priorities
+                all_priorities = set(defaults.keys()) | set(overrides.keys())
+                for priority in sorted(all_priorities):
+                    default_val = defaults.get(priority, 1)
+                    override_val = overrides.get(priority)
+                    if override_val is not None:
+                        self.console.print(
+                            f"      Priority {priority}: [cyan]{override_val}x[/cyan] (override, default: {default_val}x)"
+                        )
+                    else:
+                        self.console.print(
+                            f"      Priority {priority}: {default_val}x [dim](default)[/dim]"
+                        )
+                # Show sequential fallback if applicable
+                if rotation_mode == "sequential" and seq_fallback != 1:
+                    self.console.print(
+                        f"      Others (seq): {seq_fallback}x [dim](fallback)[/dim]"
+                    )
+        if not has_settings:
+            self.console.print("   [dim]No priority multipliers configured[/dim]")
+        self.console.print()
+        self.console.print("[bold]ℹ️  About Priority Multipliers:[/bold]")
+        self.console.print(
+            "   Higher priority tiers (lower numbers) can have higher multipliers."
+        )
+        self.console.print("   Example: Priority 1 = 5x, Priority 2 = 3x, Others = 1x")
+        self.console.print()
+        self.console.print("━" * 70)
+        self.console.print()
+        self.console.print("   1. ✏️  Set Priority Multiplier")
+        self.console.print("   2. 🔄 Reset to Provider Default")
+        self.console.print("   3. ↩️  Back")
+        choice = Prompt.ask(
+            "Select option", choices=["1", "2", "3"], show_choices=False
+        )
+        if choice == "1":
+            if not available_providers:
+                self.console.print("\n[yellow]No providers available[/yellow]")
+                input("\nPress Enter to continue...")
+                return
+            # Select provider
+            self.console.print("\n[bold]Select provider:[/bold]")
+            for idx, prov in enumerate(available_providers, 1):
+                self.console.print(f"   {idx}. {prov}")
+            prov_idx = IntPrompt.ask(
+                "Provider",
+                choices=[str(i) for i in range(1, len(available_providers) + 1)],
+            )
+            provider = available_providers[prov_idx - 1]
+            # Get priority level
+            priority = IntPrompt.ask("Priority level (e.g., 1, 2, 3)")
+            # Get current value
+            current = self.priority_multiplier_mgr.get_effective_multiplier(
+                provider, priority
+            )
+            self.console.print(
+                f"\nCurrent multiplier for priority {priority}: {current}x"
+            )
+            multiplier = IntPrompt.ask("New multiplier (1-10)", default=current)
+            if 1 <= multiplier <= 10:
+                self.priority_multiplier_mgr.set_multiplier(
+                    provider, priority, multiplier
+                )
+                self.console.print(
+                    f"\n[green]✅ Priority {priority} multiplier for '{provider}' set to {multiplier}x[/green]"
+                )
+            else:
+                self.console.print(
+                    "\n[yellow]Multiplier must be between 1 and 10[/yellow]"
+                )
+            input("\nPress Enter to continue...")
+        elif choice == "2":
+            # Find providers with overrides
+            providers_with_overrides = [
+                p for p in available_providers if p in current_multipliers
+            ]
+            if not providers_with_overrides:
+                self.console.print("\n[yellow]No custom multipliers to reset[/yellow]")
+                input("\nPress Enter to continue...")
+                return
+            self.console.print("\n[bold]Select provider to reset:[/bold]")
+            for idx, prov in enumerate(providers_with_overrides, 1):
+                self.console.print(f"   {idx}. {prov}")
+            prov_idx = IntPrompt.ask(
+                "Provider",
+                choices=[str(i) for i in range(1, len(providers_with_overrides) + 1)],
+            )
+            provider = providers_with_overrides[prov_idx - 1]
+            # Get priority to reset
+            overrides = current_multipliers.get(provider, {})
+            if len(overrides) == 1:
+                priority = list(overrides.keys())[0]
+            else:
+                self.console.print(f"\nOverrides for {provider}: {overrides}")
+                priority = IntPrompt.ask("Priority level to reset")
+            if priority in overrides:
+                self.priority_multiplier_mgr.remove_multiplier(provider, priority)
+                default = self.priority_multiplier_mgr.get_effective_multiplier(
+                    provider, priority
+                )
+                self.console.print(
+                    f"\n[green]✅ Reset priority {priority} for '{provider}' to default ({default}x)[/green]"
+                )
+            else:
+                self.console.print(
+                    f"\n[yellow]No override for priority {priority}[/yellow]"
+                )
+            input("\nPress Enter to continue...")
     def manage_concurrency_limits(self):
         """Manage concurrency limits"""
         while True:
             clear_screen()
             limits = self.concurrency_mgr.get_current_limits()
+            self.console.print(
+                Panel.fit(
+                    "[bold cyan]⚡ Concurrency Limits Configuration[/bold cyan]",
+                    border_style="cyan",
+                )
+            )
             self.console.print()
             self.console.print("[bold]📋 Current Concurrency Settings[/bold]")
             self.console.print("━" * 70)
             if limits:
                 for provider, limit in limits.items():
                     self.console.print(f"   • {provider:15} {limit} requests/key")
                 self.console.print(f"   • Default:        1 request/key (all others)")
             else:
                 self.console.print("   • Default:        1 request/key (all providers)")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             self.console.print("   2. ✏️  Edit Existing Limit")
             self.console.print("   3. 🗑️  Remove Limit (reset to default)")
             self.console.print("   4. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
+            choice = Prompt.ask(
+                "Select option", choices=["1", "2", "3", "4"], show_choices=False
+            )
             if choice == "1":
                 # Get available providers
                 available_providers = self.get_available_providers()
                 if not available_providers:
+                    self.console.print(
+                        "\n[yellow]No providers with credentials found. Please add credentials first.[/yellow]"
+                    )
                     input("\nPress Enter to continue...")
                     continue
                 # Show provider selection menu
                 self.console.print("\n[bold]Select provider:[/bold]")
                 for idx, prov in enumerate(available_providers, 1):
                     self.console.print(f"   {idx}. {prov}")
+                self.console.print(
+                    f"   {len(available_providers) + 1}. Enter custom provider name"
+                )
+                choice_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(available_providers) + 2)],
+                )
                 if choice_idx == len(available_providers) + 1:
                     provider = Prompt.ask("Provider name").strip().lower()
                 else:
                     provider = available_providers[choice_idx - 1]
                 if provider:
+                    limit = IntPrompt.ask(
+                        "Max concurrent requests per key (1-100)", default=1
+                    )
                     if 1 <= limit <= 100:
                         self.concurrency_mgr.set_limit(provider, limit)
+                        self.console.print(
+                            f"\n[green]✅ Concurrency limit set for '{provider}': {limit} requests/key[/green]"
+                        )
                     else:
+                        self.console.print(
+                            "\n[red]❌ Limit must be between 1-100[/red]"
+                        )
                     input("\nPress Enter to continue...")
             elif choice == "2":
                 if not limits:
                     self.console.print("\n[yellow]No limits to edit[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
                 self.console.print("\n[bold]Select provider to edit:[/bold]")
                 limits_list = list(limits.keys())
                 for idx, prov in enumerate(limits_list, 1):
                     self.console.print(f"   {idx}. {prov}")
+                choice_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(limits_list) + 1)],
+                )
                 provider = limits_list[choice_idx - 1]
                 current_limit = limits.get(provider, 1)
                 self.console.print(f"\nCurrent limit: {current_limit} requests/key")
+                new_limit = IntPrompt.ask(
+                    "New limit (1-100) [press Enter to keep current]",
+                    default=current_limit,
+                )
                 if 1 <= new_limit <= 100:
                     if new_limit != current_limit:
                         self.concurrency_mgr.set_limit(provider, new_limit)
+                        self.console.print(
+                            f"\n[green]✅ Concurrency limit updated for '{provider}': {new_limit} requests/key[/green]"
+                        )
                     else:
                         self.console.print("\n[yellow]No changes made[/yellow]")
                 else:
                     self.console.print("\n[red]Limit must be between 1-100[/red]")
                 input("\nPress Enter to continue...")
             elif choice == "3":
                 if not limits:
                     self.console.print("\n[yellow]No limits to remove[/yellow]")
                     input("\nPress Enter to continue...")
                     continue
                 # Show numbered list
+                self.console.print(
+                    "\n[bold]Select provider to remove limit from:[/bold]"
+                )
                 limits_list = list(limits.keys())
                 for idx, prov in enumerate(limits_list, 1):
                     self.console.print(f"   {idx}. {prov}")
+                choice_idx = IntPrompt.ask(
+                    "Select option",
+                    choices=[str(i) for i in range(1, len(limits_list) + 1)],
+                )
                 provider = limits_list[choice_idx - 1]
+                if Confirm.ask(
+                    f"Remove concurrency limit for '{provider}' (reset to default 1)?"
+                ):
                     self.concurrency_mgr.remove_limit(provider)
+                    self.console.print(
+                        f"\n[green]✅ Limit removed for '{provider}' - using default (1 request/key)[/green]"
+                    )
                     input("\nPress Enter to continue...")
             elif choice == "4":
                 break
     def save_and_exit(self):
         """Save pending changes and exit"""
         if self.settings.has_pending():
         else:
             self.console.print("\n[dim]No changes to save[/dim]")
             input("\nPress Enter to return to launcher...")
         self.running = False
     def exit_without_saving(self):
         """Exit without saving"""
         if self.settings.has_pending():

src/rotator_library/client.py CHANGED Viewed

@@ -139,12 +139,119 @@ class RotatingClient:
         self.max_retries = max_retries
         self.global_timeout = global_timeout
         self.abort_on_callback_error = abort_on_callback_error
         self.usage_manager = UsageManager(
-            file_path=usage_file_path, rotation_tolerance=rotation_tolerance
         )
         self._model_list_cache = {}
-        self._provider_plugins = PROVIDER_PLUGINS
-        self._provider_instances = {}
         self.http_client = httpx.AsyncClient()
         self.all_providers = AllProviders()
         self.cooldown_manager = CooldownManager()
@@ -958,19 +1065,185 @@ class RotatingClient:
                                 is_budget_enabled
                             )
-                    # The plugin handles the entire call, including retries on 401, etc.
-                    # The main retry loop here is for key rotation on other errors.
-                    response = await provider_plugin.acompletion(
-                        self.http_client, **litellm_kwargs
-                    )
-                    # For non-streaming, success is immediate, and this function only handles non-streaming.
-                    await self.usage_manager.record_success(
-                        current_cred, model, response
-                    )
-                    await self.usage_manager.release_key(current_cred, model)
-                    key_acquired = False
-                    return response
                 else:  # This is the standard API Key / litellm-handled provider logic
                     is_oauth = provider in self.oauth_providers
@@ -1070,7 +1343,7 @@ class RotatingClient:
                                 if request
                                 else {},
                             )
-                            classified_error = classify_error(e)
                             # Extract a clean error message for the user-facing log
                             error_message = str(e).split("\n")[0]
@@ -1114,7 +1387,7 @@ class RotatingClient:
                                 if request
                                 else {},
                             )
-                            classified_error = classify_error(e)
                             error_message = str(e).split("\n")[0]
                             # Provider-level error: don't increment consecutive failures
@@ -1170,7 +1443,7 @@ class RotatingClient:
                                 else {},
                             )
-                            classified_error = classify_error(e)
                             error_message = str(e).split("\n")[0]
                             lib_logger.warning(
@@ -1239,7 +1512,7 @@ class RotatingClient:
                                 )
                                 raise last_exception
-                            classified_error = classify_error(e)
                             error_message = str(e).split("\n")[0]
                             lib_logger.warning(
@@ -1566,7 +1839,9 @@ class RotatingClient:
                                 last_exception = e
                                 # If the exception is our custom wrapper, unwrap the original error
                                 original_exc = getattr(e, "data", e)
-                                classified_error = classify_error(original_exc)
                                 error_message = str(original_exc).split("\n")[0]
                                 log_failure(
@@ -1623,7 +1898,7 @@ class RotatingClient:
                                     if request
                                     else {},
                                 )
-                                classified_error = classify_error(e)
                                 error_message = str(e).split("\n")[0]
                                 # Provider-level error: don't increment consecutive failures
@@ -1673,7 +1948,7 @@ class RotatingClient:
                                     if request
                                     else {},
                                 )
-                                classified_error = classify_error(e)
                                 error_message = str(e).split("\n")[0]
                                 # Record in accumulator
@@ -1812,7 +2087,9 @@ class RotatingClient:
                             cleaned_str = None
                             # The actual exception might be wrapped in our StreamedAPIError.
                             original_exc = getattr(e, "data", e)
-                            classified_error = classify_error(original_exc)
                             # Check if this error should trigger rotation
                             if not should_rotate_on_error(classified_error):
@@ -1939,7 +2216,7 @@ class RotatingClient:
                                 if request
                                 else {},
                             )
-                            classified_error = classify_error(e)
                             error_message_text = str(e).split("\n")[0]
                             # Record error in accumulator (server errors are transient, not abnormal)
@@ -1990,7 +2267,7 @@ class RotatingClient:
                                 if request
                                 else {},
                             )
-                            classified_error = classify_error(e)
                             error_message_text = str(e).split("\n")[0]
                             # Record error in accumulator
@@ -2232,7 +2509,7 @@ class RotatingClient:
                     self._model_list_cache[provider] = final_models
                     return final_models
                 except Exception as e:
-                    classified_error = classify_error(e)
                     cred_display = mask_credential(credential)
                     lib_logger.debug(
                         f"Failed to get models for provider {provider} with credential {cred_display}: {classified_error.error_type}. Trying next credential."

         self.max_retries = max_retries
         self.global_timeout = global_timeout
         self.abort_on_callback_error = abort_on_callback_error
+        # Initialize provider plugins early so they can be used for rotation mode detection
+        self._provider_plugins = PROVIDER_PLUGINS
+        self._provider_instances = {}
+        # Build provider rotation modes map
+        # Each provider can specify its preferred rotation mode ("balanced" or "sequential")
+        provider_rotation_modes = {}
+        for provider in self.all_credentials.keys():
+            provider_class = self._provider_plugins.get(provider)
+            if provider_class and hasattr(provider_class, "get_rotation_mode"):
+                # Use class method to get rotation mode (checks env var + class default)
+                mode = provider_class.get_rotation_mode(provider)
+            else:
+                # Fallback: check environment variable directly
+                env_key = f"ROTATION_MODE_{provider.upper()}"
+                mode = os.getenv(env_key, "balanced")
+            provider_rotation_modes[provider] = mode
+            if mode != "balanced":
+                lib_logger.info(f"Provider '{provider}' using rotation mode: {mode}")
+        # Build priority-based concurrency multiplier maps
+        # These are universal multipliers based on credential tier/priority
+        priority_multipliers: Dict[str, Dict[int, int]] = {}
+        priority_multipliers_by_mode: Dict[str, Dict[str, Dict[int, int]]] = {}
+        sequential_fallback_multipliers: Dict[str, int] = {}
+        for provider in self.all_credentials.keys():
+            provider_class = self._provider_plugins.get(provider)
+            # Start with provider class defaults
+            if provider_class:
+                # Get default priority multipliers from provider class
+                if hasattr(provider_class, "default_priority_multipliers"):
+                    default_multipliers = provider_class.default_priority_multipliers
+                    if default_multipliers:
+                        priority_multipliers[provider] = dict(default_multipliers)
+                # Get sequential fallback from provider class
+                if hasattr(provider_class, "default_sequential_fallback_multiplier"):
+                    fallback = provider_class.default_sequential_fallback_multiplier
+                    if fallback != 1:  # Only store if different from global default
+                        sequential_fallback_multipliers[provider] = fallback
+            # Override with environment variables
+            # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
+            # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
+            for key, value in os.environ.items():
+                prefix = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_"
+                if key.startswith(prefix):
+                    remainder = key[len(prefix) :]
+                    try:
+                        multiplier = int(value)
+                        if multiplier < 1:
+                            lib_logger.warning(f"Invalid {key}: {value}. Must be >= 1.")
+                            continue
+                        # Check if mode-specific (e.g., _PRIORITY_1_SEQUENTIAL)
+                        if "_" in remainder:
+                            parts = remainder.rsplit("_", 1)
+                            priority = int(parts[0])
+                            mode = parts[1].lower()
+                            if mode in ("sequential", "balanced"):
+                                # Mode-specific override
+                                if provider not in priority_multipliers_by_mode:
+                                    priority_multipliers_by_mode[provider] = {}
+                                if mode not in priority_multipliers_by_mode[provider]:
+                                    priority_multipliers_by_mode[provider][mode] = {}
+                                priority_multipliers_by_mode[provider][mode][
+                                    priority
+                                ] = multiplier
+                                lib_logger.info(
+                                    f"Provider '{provider}' priority {priority} ({mode} mode) multiplier: {multiplier}x"
+                                )
+                            else:
+                                # Assume it's part of the priority number (unlikely but handle gracefully)
+                                lib_logger.warning(f"Unknown mode in {key}: {mode}")
+                        else:
+                            # Universal priority multiplier
+                            priority = int(remainder)
+                            if provider not in priority_multipliers:
+                                priority_multipliers[provider] = {}
+                            priority_multipliers[provider][priority] = multiplier
+                            lib_logger.info(
+                                f"Provider '{provider}' priority {priority} multiplier: {multiplier}x"
+                            )
+                    except ValueError:
+                        lib_logger.warning(
+                            f"Invalid {key}: {value}. Could not parse priority or multiplier."
+                        )
+        # Log configured multipliers
+        for provider, multipliers in priority_multipliers.items():
+            if multipliers:
+                lib_logger.info(
+                    f"Provider '{provider}' priority multipliers: {multipliers}"
+                )
+        for provider, fallback in sequential_fallback_multipliers.items():
+            lib_logger.info(
+                f"Provider '{provider}' sequential fallback multiplier: {fallback}x"
+            )
         self.usage_manager = UsageManager(
+            file_path=usage_file_path,
+            rotation_tolerance=rotation_tolerance,
+            provider_rotation_modes=provider_rotation_modes,
+            provider_plugins=PROVIDER_PLUGINS,
+            priority_multipliers=priority_multipliers,
+            priority_multipliers_by_mode=priority_multipliers_by_mode,
+            sequential_fallback_multipliers=sequential_fallback_multipliers,
         )
         self._model_list_cache = {}
         self.http_client = httpx.AsyncClient()
         self.all_providers = AllProviders()
         self.cooldown_manager = CooldownManager()
                                 is_budget_enabled
                             )
+                    # Retry loop for custom providers - mirrors streaming path error handling
+                    for attempt in range(self.max_retries):
+                        try:
+                            lib_logger.info(
+                                f"Attempting call with credential {mask_credential(current_cred)} (Attempt {attempt + 1}/{self.max_retries})"
+                            )
+                            if pre_request_callback:
+                                try:
+                                    await pre_request_callback(request, litellm_kwargs)
+                                except Exception as e:
+                                    if self.abort_on_callback_error:
+                                        raise PreRequestCallbackError(
+                                            f"Pre-request callback failed: {e}"
+                                        ) from e
+                                    else:
+                                        lib_logger.warning(
+                                            f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}"
+                                        )
+                            response = await provider_plugin.acompletion(
+                                self.http_client, **litellm_kwargs
+                            )
+                            # For non-streaming, success is immediate
+                            await self.usage_manager.record_success(
+                                current_cred, model, response
+                            )
+                            await self.usage_manager.release_key(current_cred, model)
+                            key_acquired = False
+                            return response
+                        except (
+                            litellm.RateLimitError,
+                            httpx.HTTPStatusError,
+                        ) as e:
+                            last_exception = e
+                            classified_error = classify_error(e, provider=provider)
+                            error_message = str(e).split("\n")[0]
+                            log_failure(
+                                api_key=current_cred,
+                                model=model,
+                                attempt=attempt + 1,
+                                error=e,
+                                request_headers=dict(request.headers)
+                                if request
+                                else {},
+                            )
+                            # Record in accumulator for client reporting
+                            error_accumulator.record_error(
+                                current_cred, classified_error, error_message
+                            )
+                            # Check if this error should trigger rotation
+                            if not should_rotate_on_error(classified_error):
+                                lib_logger.error(
+                                    f"Non-recoverable error ({classified_error.error_type}) during custom provider call. Failing."
+                                )
+                                raise last_exception
+                            # Handle rate limits with cooldown (exclude quota_exceeded)
+                            if classified_error.error_type == "rate_limit":
+                                cooldown_duration = classified_error.retry_after or 60
+                                await self.cooldown_manager.start_cooldown(
+                                    provider, cooldown_duration
+                                )
+                            await self.usage_manager.record_failure(
+                                current_cred, model, classified_error
+                            )
+                            lib_logger.warning(
+                                f"Cred {mask_credential(current_cred)} {classified_error.error_type} (HTTP {classified_error.status_code}). Rotating."
+                            )
+                            break  # Rotate to next credential
+                        except (
+                            APIConnectionError,
+                            litellm.InternalServerError,
+                            litellm.ServiceUnavailableError,
+                        ) as e:
+                            last_exception = e
+                            log_failure(
+                                api_key=current_cred,
+                                model=model,
+                                attempt=attempt + 1,
+                                error=e,
+                                request_headers=dict(request.headers)
+                                if request
+                                else {},
+                            )
+                            classified_error = classify_error(e, provider=provider)
+                            error_message = str(e).split("\n")[0]
+                            # Provider-level error: don't increment consecutive failures
+                            await self.usage_manager.record_failure(
+                                current_cred,
+                                model,
+                                classified_error,
+                                increment_consecutive_failures=False,
+                            )
+                            if attempt >= self.max_retries - 1:
+                                error_accumulator.record_error(
+                                    current_cred, classified_error, error_message
+                                )
+                                lib_logger.warning(
+                                    f"Cred {mask_credential(current_cred)} failed after max retries. Rotating."
+                                )
+                                break
+                            wait_time = classified_error.retry_after or (
+                                2**attempt
+                            ) + random.uniform(0, 1)
+                            remaining_budget = deadline - time.time()
+                            if wait_time > remaining_budget:
+                                error_accumulator.record_error(
+                                    current_cred, classified_error, error_message
+                                )
+                                lib_logger.warning(
+                                    f"Retry wait ({wait_time:.2f}s) exceeds budget. Rotating."
+                                )
+                                break
+                            lib_logger.warning(
+                                f"Cred {mask_credential(current_cred)} server error. Retrying in {wait_time:.2f}s."
+                            )
+                            await asyncio.sleep(wait_time)
+                            continue
+                        except Exception as e:
+                            last_exception = e
+                            log_failure(
+                                api_key=current_cred,
+                                model=model,
+                                attempt=attempt + 1,
+                                error=e,
+                                request_headers=dict(request.headers)
+                                if request
+                                else {},
+                            )
+                            classified_error = classify_error(e, provider=provider)
+                            error_message = str(e).split("\n")[0]
+                            # Record in accumulator
+                            error_accumulator.record_error(
+                                current_cred, classified_error, error_message
+                            )
+                            lib_logger.warning(
+                                f"Cred {mask_credential(current_cred)} {classified_error.error_type} (HTTP {classified_error.status_code})."
+                            )
+                            # Check if this error should trigger rotation
+                            if not should_rotate_on_error(classified_error):
+                                lib_logger.error(
+                                    f"Non-recoverable error ({classified_error.error_type}). Failing."
+                                )
+                                raise last_exception
+                            # Handle rate limits with cooldown (exclude quota_exceeded)
+                            if (
+                                classified_error.status_code == 429
+                                and classified_error.error_type != "quota_exceeded"
+                            ) or classified_error.error_type == "rate_limit":
+                                cooldown_duration = classified_error.retry_after or 60
+                                await self.cooldown_manager.start_cooldown(
+                                    provider, cooldown_duration
+                                )
+                            await self.usage_manager.record_failure(
+                                current_cred, model, classified_error
+                            )
+                            break  # Rotate to next credential
+                    # If the inner loop breaks, it means the key failed and we need to rotate.
+                    # Continue to the next iteration of the outer while loop to pick a new key.
+                    continue
                 else:  # This is the standard API Key / litellm-handled provider logic
                     is_oauth = provider in self.oauth_providers
                                 if request
                                 else {},
                             )
+                            classified_error = classify_error(e, provider=provider)
                             # Extract a clean error message for the user-facing log
                             error_message = str(e).split("\n")[0]
                                 if request
                                 else {},
                             )
+                            classified_error = classify_error(e, provider=provider)
                             error_message = str(e).split("\n")[0]
                             # Provider-level error: don't increment consecutive failures
                                 else {},
                             )
+                            classified_error = classify_error(e, provider=provider)
                             error_message = str(e).split("\n")[0]
                             lib_logger.warning(
                                 )
                                 raise last_exception
+                            classified_error = classify_error(e, provider=provider)
                             error_message = str(e).split("\n")[0]
                             lib_logger.warning(
                                 last_exception = e
                                 # If the exception is our custom wrapper, unwrap the original error
                                 original_exc = getattr(e, "data", e)
+                                classified_error = classify_error(
+                                    original_exc, provider=provider
+                                )
                                 error_message = str(original_exc).split("\n")[0]
                                 log_failure(
                                     if request
                                     else {},
                                 )
+                                classified_error = classify_error(e, provider=provider)
                                 error_message = str(e).split("\n")[0]
                                 # Provider-level error: don't increment consecutive failures
                                     if request
                                     else {},
                                 )
+                                classified_error = classify_error(e, provider=provider)
                                 error_message = str(e).split("\n")[0]
                                 # Record in accumulator
                             cleaned_str = None
                             # The actual exception might be wrapped in our StreamedAPIError.
                             original_exc = getattr(e, "data", e)
+                            classified_error = classify_error(
+                                original_exc, provider=provider
+                            )
                             # Check if this error should trigger rotation
                             if not should_rotate_on_error(classified_error):
                                 if request
                                 else {},
                             )
+                            classified_error = classify_error(e, provider=provider)
                             error_message_text = str(e).split("\n")[0]
                             # Record error in accumulator (server errors are transient, not abnormal)
                                 if request
                                 else {},
                             )
+                            classified_error = classify_error(e, provider=provider)
                             error_message_text = str(e).split("\n")[0]
                             # Record error in accumulator
                     self._model_list_cache[provider] = final_models
                     return final_models
                 except Exception as e:
+                    classified_error = classify_error(e, provider=provider)
                     cred_display = mask_credential(credential)
                     lib_logger.debug(
                         f"Failed to get models for provider {provider} with credential {cred_display}: {classified_error.error_type}. Trying next credential."

src/rotator_library/error_handler.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import re
 import json
 import os
 from typing import Optional, Dict, Any
 import httpx
@@ -17,6 +18,8 @@ from litellm.exceptions import (
     ContextWindowExceededError,
 )
 def _parse_duration_string(duration_str: str) -> Optional[int]:
     """
@@ -344,14 +347,26 @@ class ClassifiedError:
         original_exception: Exception,
         status_code: Optional[int] = None,
         retry_after: Optional[int] = None,
     ):
         self.error_type = error_type
         self.original_exception = original_exception
         self.status_code = status_code
         self.retry_after = retry_after
     def __str__(self):
-        return f"ClassifiedError(type={self.error_type}, status={self.status_code}, retry_after={self.retry_after}, original_exc={self.original_exception})"
 def _extract_retry_from_json_body(json_text: str) -> Optional[int]:
@@ -513,11 +528,15 @@ def get_retry_after(error: Exception) -> Optional[int]:
     return None
-def classify_error(e: Exception) -> ClassifiedError:
     """
     Classifies an exception into a structured ClassifiedError object.
     Now handles both litellm and httpx exceptions.
     Error types and their typical handling:
     - rate_limit (429): Rotate key, may retry with backoff
     - server_error (5xx): Retry with backoff, then rotate
@@ -528,7 +547,62 @@ def classify_error(e: Exception) -> ClassifiedError:
     - context_window_exceeded: Don't retry - request too large
     - api_connection: Retry with backoff, then rotate
     - unknown: Rotate key (safer to try another)
     """
     status_code = getattr(e, "status_code", None)
     if isinstance(e, httpx.HTTPStatusError):  # [NEW] Handle httpx errors first

 import re
 import json
 import os
+import logging
 from typing import Optional, Dict, Any
 import httpx
     ContextWindowExceededError,
 )
+lib_logger = logging.getLogger("rotator_library")
 def _parse_duration_string(duration_str: str) -> Optional[int]:
     """
         original_exception: Exception,
         status_code: Optional[int] = None,
         retry_after: Optional[int] = None,
+        quota_reset_timestamp: Optional[float] = None,
     ):
         self.error_type = error_type
         self.original_exception = original_exception
         self.status_code = status_code
         self.retry_after = retry_after
+        # Unix timestamp when quota resets (from quota_exhausted errors)
+        # This is the authoritative reset time parsed from provider's error response
+        self.quota_reset_timestamp = quota_reset_timestamp
     def __str__(self):
+        parts = [
+            f"type={self.error_type}",
+            f"status={self.status_code}",
+            f"retry_after={self.retry_after}",
+        ]
+        if self.quota_reset_timestamp:
+            parts.append(f"quota_reset_ts={self.quota_reset_timestamp}")
+        parts.append(f"original_exc={self.original_exception}")
+        return f"ClassifiedError({', '.join(parts)})"
 def _extract_retry_from_json_body(json_text: str) -> Optional[int]:
     return None
+def classify_error(e: Exception, provider: Optional[str] = None) -> ClassifiedError:
     """
     Classifies an exception into a structured ClassifiedError object.
     Now handles both litellm and httpx exceptions.
+    If provider is specified and has a parse_quota_error() method,
+    attempts provider-specific error parsing first before falling back
+    to generic classification.
     Error types and their typical handling:
     - rate_limit (429): Rotate key, may retry with backoff
     - server_error (5xx): Retry with backoff, then rotate
     - context_window_exceeded: Don't retry - request too large
     - api_connection: Retry with backoff, then rotate
     - unknown: Rotate key (safer to try another)
+    Args:
+        e: The exception to classify
+        provider: Optional provider name for provider-specific error parsing
+    Returns:
+        ClassifiedError with error_type, status_code, retry_after, etc.
     """
+    # Try provider-specific parsing first for 429/rate limit errors
+    if provider:
+        try:
+            from .providers import PROVIDER_PLUGINS
+            provider_class = PROVIDER_PLUGINS.get(provider)
+            if provider_class and hasattr(provider_class, "parse_quota_error"):
+                # Get error body if available
+                error_body = None
+                if hasattr(e, "response") and hasattr(e.response, "text"):
+                    try:
+                        error_body = e.response.text
+                    except Exception:
+                        pass
+                elif hasattr(e, "body"):
+                    error_body = str(e.body)
+                quota_info = provider_class.parse_quota_error(e, error_body)
+                if quota_info and quota_info.get("retry_after"):
+                    retry_after = quota_info["retry_after"]
+                    reason = quota_info.get("reason", "QUOTA_EXHAUSTED")
+                    reset_ts = quota_info.get("reset_timestamp")
+                    quota_reset_timestamp = quota_info.get("quota_reset_timestamp")
+                    # Log the parsed result with human-readable duration
+                    hours = retry_after / 3600
+                    lib_logger.info(
+                        f"Provider '{provider}' parsed quota error: "
+                        f"retry_after={retry_after}s ({hours:.1f}h), reason={reason}"
+                        + (f", resets at {reset_ts}" if reset_ts else "")
+                    )
+                    return ClassifiedError(
+                        error_type="quota_exceeded",
+                        original_exception=e,
+                        status_code=429,
+                        retry_after=retry_after,
+                        quota_reset_timestamp=quota_reset_timestamp,
+                    )
+        except Exception as parse_error:
+            lib_logger.debug(
+                f"Provider-specific error parsing failed for '{provider}': {parse_error}"
+            )
+            # Fall through to generic classification
+    # Generic classification logic
     status_code = getattr(e, "status_code", None)
     if isinstance(e, httpx.HTTPStatusError):  # [NEW] Handle httpx errors first

src/rotator_library/providers/antigravity_provider.py CHANGED Viewed

@@ -34,7 +34,7 @@ from urllib.parse import urlparse
 import httpx
 import litellm
-from .provider_interface import ProviderInterface
 from .antigravity_auth_base import AntigravityAuthBase
 from .provider_cache import ProviderCache
 from ..model_definitions import ModelDefinitions
@@ -50,7 +50,7 @@ lib_logger = logging.getLogger("rotator_library")
 # Priority: daily (sandbox) → autopush (sandbox) → production
 BASE_URLS = [
     "https://daily-cloudcode-pa.sandbox.googleapis.com/v1internal",
-    "https://autopush-cloudcode-pa.sandbox.googleapis.com/v1internal",
     "https://cloudcode-pa.googleapis.com/v1internal",  # Production fallback
 ]
@@ -494,6 +494,227 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
     skip_cost_calculation = True
     def __init__(self):
         super().__init__()
         self.model_definitions = ModelDefinitions()
@@ -577,43 +798,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
             f"claude_fix={self._enable_claude_tool_fix}, thinking_sanitization={self._enable_thinking_sanitization}"
         )
-    # =========================================================================
-    # CREDENTIAL PRIORITIZATION
-    # =========================================================================
-    def get_credential_priority(self, credential: str) -> Optional[int]:
-        """
-        Returns priority based on Antigravity tier.
-        Paid tiers: priority 1 (highest)
-        Free tier: priority 2
-        Legacy/Unknown: priority 10 (lowest)
-        Args:
-            credential: The credential path
-        Returns:
-            Priority level (1-10) or None if tier not yet discovered
-        """
-        tier = self.project_tier_cache.get(credential)
-        # Lazy load from file if not in cache
-        if not tier:
-            tier = self._load_tier_from_file(credential)
-        if not tier:
-            return None  # Not yet discovered
-        # Paid tiers get highest priority
-        if tier not in ["free-tier", "legacy-tier", "unknown"]:
-            return 1
-        # Free tier gets lower priority
-        if tier == "free-tier":
-            return 2
-        # Legacy and unknown get even lower
-        return 10
     def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
         """
         Load tier from credential file's _proxy_metadata and cache it.
@@ -2375,9 +2559,9 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
                                 f"Ignoring duplicate - this may indicate malformed conversation history."
                             )
                             continue
-                        lib_logger.debug(
-                            f"[Grouping] Collected response for ID: {resp_id}"
-                        )
                         collected_responses[resp_id] = resp
                 # Try to satisfy pending groups (newest first)
@@ -2392,10 +2576,10 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
                             collected_responses.pop(gid) for gid in group_ids
                         ]
                         new_contents.append({"parts": group_responses, "role": "user"})
-                        lib_logger.debug(
-                            f"[Grouping] Satisfied group with {len(group_responses)} responses: "
-                            f"ids={group_ids}"
-                        )
                         pending_groups.pop(i)
                         break
                 continue
@@ -2415,10 +2599,10 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
                     ]
                     if call_ids:
-                        lib_logger.debug(
-                            f"[Grouping] Created pending group expecting {len(call_ids)} responses: "
-                            f"ids={call_ids}, names={func_names}"
-                        )
                         pending_groups.append(
                             {
                                 "ids": call_ids,
@@ -3450,7 +3634,28 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
                 return await self._handle_non_streaming(
                     client, url, headers, payload, model, file_logger
                 )
         except Exception as e:
             if self._try_next_base_url():
                 lib_logger.warning(f"Retrying with fallback URL: {e}")
                 url = f"{self._get_base_url()}{endpoint}"
@@ -3534,11 +3739,13 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
             "POST", url, headers=headers, json=payload, timeout=600.0
         ) as response:
             if response.status_code >= 400:
                 try:
-                    error_body = await response.aread()
-                    lib_logger.error(
-                        f"API error {response.status_code}: {error_body.decode()}"
-                    )
                 except Exception:
                     pass

 import httpx
 import litellm
+from .provider_interface import ProviderInterface, UsageResetConfigDef, QuotaGroupMap
 from .antigravity_auth_base import AntigravityAuthBase
 from .provider_cache import ProviderCache
 from ..model_definitions import ModelDefinitions
 # Priority: daily (sandbox) → autopush (sandbox) → production
 BASE_URLS = [
     "https://daily-cloudcode-pa.sandbox.googleapis.com/v1internal",
+    # "https://autopush-cloudcode-pa.sandbox.googleapis.com/v1internal",
     "https://cloudcode-pa.googleapis.com/v1internal",  # Production fallback
 ]
     skip_cost_calculation = True
+    # Sequential mode by default - preserves thinking signature caches between requests
+    default_rotation_mode: str = "sequential"
+    # =========================================================================
+    # TIER & USAGE CONFIGURATION
+    # =========================================================================
+    # Provider name for env var lookups (QUOTA_GROUPS_ANTIGRAVITY_*)
+    provider_env_name: str = "antigravity"
+    # Tier name -> priority mapping (Single Source of Truth)
+    # Lower numbers = higher priority
+    tier_priorities = {
+        # Priority 1: Highest paid tier (Google AI Ultra - name unconfirmed)
+        # "google-ai-ultra": 1,  # Uncomment when tier name is confirmed
+        # Priority 2: Standard paid tier
+        "standard-tier": 2,
+        # Priority 3: Free tier
+        "free-tier": 3,
+        # Priority 10: Legacy/Unknown (lowest)
+        "legacy-tier": 10,
+        "unknown": 10,
+    }
+    # Default priority for tiers not in the mapping
+    default_tier_priority: int = 10
+    # Usage reset configs keyed by priority sets
+    # Priorities 1-2 (paid tiers) get 5h window, others get 7d window
+    usage_reset_configs = {
+        frozenset({1, 2}): UsageResetConfigDef(
+            window_seconds=5 * 60 * 60,  # 5 hours
+            mode="per_model",
+            description="5-hour per-model window (paid tier)",
+            field_name="models",
+        ),
+        "default": UsageResetConfigDef(
+            window_seconds=7 * 24 * 60 * 60,  # 7 days
+            mode="per_model",
+            description="7-day per-model window (free/unknown tier)",
+            field_name="models",
+        ),
+    }
+    # Model quota groups (can be overridden via QUOTA_GROUPS_ANTIGRAVITY_CLAUDE)
+    # Models in the same group share quota - when one is exhausted, all are
+    model_quota_groups: QuotaGroupMap = {
+        "claude": ["claude-sonnet-4-5", "claude-opus-4-5"],
+    }
+    # Model usage weights for grouped usage calculation
+    # Opus consumes more quota per request, so its usage counts 2x when
+    # comparing credentials for selection
+    model_usage_weights = {
+        "claude-opus-4-5": 2,
+    }
+    # Priority-based concurrency multipliers
+    # Higher priority credentials (lower number) get higher multipliers
+    # Priority 1 (paid ultra): 5x concurrent requests
+    # Priority 2 (standard paid): 3x concurrent requests
+    # Others: Use sequential fallback (2x) or balanced default (1x)
+    default_priority_multipliers = {1: 5, 2: 3}
+    # For sequential mode, lower priority tiers still get 2x to maintain stickiness
+    # For balanced mode, this doesn't apply (falls back to 1x)
+    default_sequential_fallback_multiplier = 2
+    @staticmethod
+    def parse_quota_error(
+        error: Exception, error_body: Optional[str] = None
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Parse Antigravity/Google RPC quota errors.
+        Handles the Google Cloud API error format with ErrorInfo and RetryInfo details.
+        Example error format:
+        {
+          "error": {
+            "code": 429,
+            "details": [
+              {
+                "@type": "type.googleapis.com/google.rpc.ErrorInfo",
+                "reason": "QUOTA_EXHAUSTED",
+                "metadata": {
+                  "quotaResetDelay": "143h4m52.730699158s",
+                  "quotaResetTimeStamp": "2025-12-11T22:53:16Z"
+                }
+              },
+              {
+                "@type": "type.googleapis.com/google.rpc.RetryInfo",
+                "retryDelay": "515092.730699158s"
+              }
+            ]
+          }
+        }
+        Args:
+            error: The caught exception
+            error_body: Optional raw response body string
+        Returns:
+            None if not a parseable quota error, otherwise:
+            {
+                "retry_after": int,
+                "reason": str,
+                "reset_timestamp": str | None,
+            }
+        """
+        import re as regex_module
+        def parse_duration(duration_str: str) -> Optional[int]:
+            """Parse duration strings like '143h4m52.73s' or '515092.73s' to seconds."""
+            if not duration_str:
+                return None
+            # Handle pure seconds format: "515092.730699158s"
+            pure_seconds_match = regex_module.match(r"^([\d.]+)s$", duration_str)
+            if pure_seconds_match:
+                return int(float(pure_seconds_match.group(1)))
+            # Handle compound format: "143h4m52.730699158s"
+            total_seconds = 0
+            patterns = [
+                (r"(\d+)h", 3600),  # hours
+                (r"(\d+)m", 60),  # minutes
+                (r"([\d.]+)s", 1),  # seconds
+            ]
+            for pattern, multiplier in patterns:
+                match = regex_module.search(pattern, duration_str)
+                if match:
+                    total_seconds += float(match.group(1)) * multiplier
+            return int(total_seconds) if total_seconds > 0 else None
+        # Get error body from exception if not provided
+        body = error_body
+        if not body:
+            # Try to extract from various exception attributes
+            if hasattr(error, "response") and hasattr(error.response, "text"):
+                body = error.response.text
+            elif hasattr(error, "body"):
+                body = str(error.body)
+            elif hasattr(error, "message"):
+                body = str(error.message)
+            else:
+                body = str(error)
+        # Try to find JSON in the body
+        try:
+            # Handle cases where JSON is embedded in a larger string
+            json_match = regex_module.search(r"\{[\s\S]*\}", body)
+            if not json_match:
+                return None
+            data = json.loads(json_match.group(0))
+        except (json.JSONDecodeError, AttributeError, TypeError):
+            return None
+        # Navigate to error.details
+        error_obj = data.get("error", data)
+        details = error_obj.get("details", [])
+        if not details:
+            return None
+        result = {
+            "retry_after": None,
+            "reason": None,
+            "reset_timestamp": None,
+            "quota_reset_timestamp": None,  # Unix timestamp for quota reset
+        }
+        for detail in details:
+            detail_type = detail.get("@type", "")
+            # Parse RetryInfo - most authoritative source for retry delay
+            if "RetryInfo" in detail_type:
+                retry_delay = detail.get("retryDelay")
+                if retry_delay:
+                    parsed = parse_duration(retry_delay)
+                    if parsed:
+                        result["retry_after"] = parsed
+            # Parse ErrorInfo - contains reason and quota reset metadata
+            elif "ErrorInfo" in detail_type:
+                result["reason"] = detail.get("reason")
+                metadata = detail.get("metadata", {})
+                # Get quotaResetDelay as fallback if RetryInfo not present
+                if not result["retry_after"]:
+                    quota_delay = metadata.get("quotaResetDelay")
+                    if quota_delay:
+                        parsed = parse_duration(quota_delay)
+                        if parsed:
+                            result["retry_after"] = parsed
+                # Capture reset timestamp for logging and authoritative reset time
+                reset_ts_str = metadata.get("quotaResetTimeStamp")
+                result["reset_timestamp"] = reset_ts_str
+                # Parse ISO timestamp to Unix timestamp for usage tracking
+                if reset_ts_str:
+                    try:
+                        # Handle ISO format: "2025-12-11T22:53:16Z"
+                        reset_dt = datetime.fromisoformat(
+                            reset_ts_str.replace("Z", "+00:00")
+                        )
+                        result["quota_reset_timestamp"] = reset_dt.timestamp()
+                    except (ValueError, AttributeError) as e:
+                        lib_logger.warning(
+                            f"Failed to parse quota reset timestamp '{reset_ts_str}': {e}"
+                        )
+        # Return None if we couldn't extract retry_after
+        if not result["retry_after"]:
+            return None
+        return result
     def __init__(self):
         super().__init__()
         self.model_definitions = ModelDefinitions()
             f"claude_fix={self._enable_claude_tool_fix}, thinking_sanitization={self._enable_thinking_sanitization}"
         )
     def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
         """
         Load tier from credential file's _proxy_metadata and cache it.
                                 f"Ignoring duplicate - this may indicate malformed conversation history."
                             )
                             continue
+                        #lib_logger.debug(
+                        #    f"[Grouping] Collected response for ID: {resp_id}"
+                        #)
                         collected_responses[resp_id] = resp
                 # Try to satisfy pending groups (newest first)
                             collected_responses.pop(gid) for gid in group_ids
                         ]
                         new_contents.append({"parts": group_responses, "role": "user"})
+                        #lib_logger.debug(
+                        #    f"[Grouping] Satisfied group with {len(group_responses)} responses: "
+                        #    f"ids={group_ids}"
+                        #)
                         pending_groups.pop(i)
                         break
                 continue
                     ]
                     if call_ids:
+                        #lib_logger.debug(
+                        #    f"[Grouping] Created pending group expecting {len(call_ids)} responses: "
+                        #    f"ids={call_ids}, names={func_names}"
+                        #)
                         pending_groups.append(
                             {
                                 "ids": call_ids,
                 return await self._handle_non_streaming(
                     client, url, headers, payload, model, file_logger
                 )
+        except httpx.HTTPStatusError as e:
+            # 429 = Rate limit/quota exhausted - tied to credential, not URL
+            # Do NOT retry on different URL, just raise immediately
+            if e.response.status_code == 429:
+                lib_logger.debug(f"429 quota error - not retrying on fallback URL: {e}")
+                raise
+            # For other HTTP errors (403, 500, etc.), try fallback URL
+            if self._try_next_base_url():
+                lib_logger.warning(f"Retrying with fallback URL: {e}")
+                url = f"{self._get_base_url()}{endpoint}"
+                if stream:
+                    return self._handle_streaming(
+                        client, url, headers, payload, model, file_logger
+                    )
+                else:
+                    return await self._handle_non_streaming(
+                        client, url, headers, payload, model, file_logger
+                    )
+            raise
         except Exception as e:
+            # Non-HTTP errors (network issues, timeouts, etc.) - try fallback URL
             if self._try_next_base_url():
                 lib_logger.warning(f"Retrying with fallback URL: {e}")
                 url = f"{self._get_base_url()}{endpoint}"
             "POST", url, headers=headers, json=payload, timeout=600.0
         ) as response:
             if response.status_code >= 400:
+                # Read error body for raise_for_status to include in exception
+                # Terminal logging commented out - errors are logged in failures.log
                 try:
+                    await response.aread()
+                    # lib_logger.error(
+                    #     f"API error {response.status_code}: {error_body.decode()}"
+                    # )
                 except Exception:
                     pass

src/rotator_library/providers/gemini_cli_provider.py CHANGED Viewed

@@ -186,6 +186,71 @@ def _env_int(key: str, default: int) -> int:
 class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     skip_cost_calculation = True
     def __init__(self):
         super().__init__()
         self.model_definitions = ModelDefinitions()
@@ -239,41 +304,13 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
         )
     # =========================================================================
-    # CREDENTIAL PRIORITIZATION
     # =========================================================================
-    def get_credential_priority(self, credential: str) -> Optional[int]:
-        """
-        Returns priority based on Gemini tier.
-        Paid tiers: priority 1 (highest)
-        Free/Legacy tiers: priority 2
-        Unknown: priority 10 (lowest)
-        Args:
-            credential: The credential path
-        Returns:
-            Priority level (1-10) or None if tier not yet discovered
-        """
-        tier = self.project_tier_cache.get(credential)
-        # Lazy load from file if not in cache
-        if not tier:
-            tier = self._load_tier_from_file(credential)
-        if not tier:
-            return None  # Not yet discovered
-        # Paid tiers get highest priority
-        if tier not in ["free-tier", "legacy-tier", "unknown"]:
-            return 1
-        # Free tier gets lower priority
-        if tier == "free-tier":
-            return 2
-        # Legacy and unknown get even lower
-        return 10
     def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
         """

 class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     skip_cost_calculation = True
+    # Balanced by default - Gemini CLI has short cooldowns (seconds, not hours)
+    default_rotation_mode: str = "balanced"
+    # =========================================================================
+    # TIER CONFIGURATION
+    # =========================================================================
+    # Provider name for env var lookups (QUOTA_GROUPS_GEMINI_CLI_*)
+    provider_env_name: str = "gemini_cli"
+    # Tier name -> priority mapping (Single Source of Truth)
+    # Same tier names as Antigravity (coincidentally), but defined separately
+    tier_priorities = {
+        # Priority 1: Highest paid tier (Google AI Ultra - name unconfirmed)
+        # "google-ai-ultra": 1,  # Uncomment when tier name is confirmed
+        # Priority 2: Standard paid tier
+        "standard-tier": 2,
+        # Priority 3: Free tier
+        "free-tier": 3,
+        # Priority 10: Legacy/Unknown (lowest)
+        "legacy-tier": 10,
+        "unknown": 10,
+    }
+    # Default priority for tiers not in the mapping
+    default_tier_priority: int = 10
+    # Gemini CLI uses default daily reset - no custom usage_reset_configs
+    # (Empty dict means inherited get_usage_reset_config returns None)
+    # No quota groups defined for Gemini CLI
+    # (Models don't share quotas)
+    # Priority-based concurrency multipliers
+    # Same structure as Antigravity (by coincidence, tiers share naming)
+    # Priority 1 (paid ultra): 5x concurrent requests
+    # Priority 2 (standard paid): 3x concurrent requests
+    # Others: 1x (no sequential fallback, uses global default)
+    default_priority_multipliers = {1: 5, 2: 3}
+    # No sequential fallback for Gemini CLI (uses balanced mode default)
+    # default_sequential_fallback_multiplier = 1  (inherited from ProviderInterface)
+    @staticmethod
+    def parse_quota_error(
+        error: Exception, error_body: Optional[str] = None
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Parse Gemini CLI quota errors.
+        Uses the same Google RPC format as Antigravity but typically has
+        much shorter cooldown durations (seconds to minutes, not hours).
+        Args:
+            error: The caught exception
+            error_body: Optional raw response body string
+        Returns:
+            Same format as AntigravityProvider.parse_quota_error()
+        """
+        # Reuse the same parsing logic as Antigravity since both use Google RPC format
+        from .antigravity_provider import AntigravityProvider
+        return AntigravityProvider.parse_quota_error(error, error_body)
     def __init__(self):
         super().__init__()
         self.model_definitions = ModelDefinitions()
         )
     # =========================================================================
+    # CREDENTIAL TIER LOOKUP (Provider-specific - uses cache)
+    # =========================================================================
+    #
+    # NOTE: get_credential_priority() is now inherited from ProviderInterface.
+    # It uses get_credential_tier_name() to get the tier and resolve priority
+    # from the tier_priorities class attribute.
     # =========================================================================
     def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
         """

src/rotator_library/providers/provider_interface.py CHANGED Viewed

@@ -1,9 +1,46 @@
 from abc import ABC, abstractmethod
-from typing import List, Dict, Any, Optional, AsyncGenerator, Union
 import httpx
 import litellm
 class ProviderInterface(ABC):
     """
     An interface for API provider-specific functionality, including model
@@ -12,6 +49,69 @@ class ProviderInterface(ABC):
     skip_cost_calculation: bool = False
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
@@ -81,28 +181,50 @@ class ProviderInterface(ABC):
         pass
     # [NEW] Credential Prioritization System
     def get_credential_priority(self, credential: str) -> Optional[int]:
         """
         Returns the priority level for a credential.
         Lower numbers = higher priority (1 is highest).
-        Returns None if provider doesn't use priorities.
-        This allows providers to auto-detect credential tiers (e.g., paid vs free)
-        and ensure higher-tier credentials are always tried first.
         Args:
             credential: The credential identifier (API key or path)
         Returns:
-            Priority level (1-10) or None if no priority system
-        Example:
-            For Gemini CLI:
-            - Paid tier credentials: priority 1 (highest)
-            - Free tier credentials: priority 2
-            - Unknown tier: priority 10 (lowest)
         """
-        return None
     def get_model_tier_requirement(self, model: str) -> Optional[int]:
         """
@@ -153,3 +275,274 @@ class ProviderInterface(ABC):
             Tier name string (e.g., "free-tier", "paid-tier") or None if unknown
         """
         return None

 from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional, AsyncGenerator, Union, FrozenSet
+import os
 import httpx
 import litellm
+# =============================================================================
+# TIER & USAGE CONFIGURATION TYPES
+# =============================================================================
+@dataclass(frozen=True)
+class UsageResetConfigDef:
+    """
+    Definition for usage reset configuration per tier type.
+    Providers define these as class attributes to specify how usage stats
+    should reset based on credential tier (paid vs free).
+    Attributes:
+        window_seconds: Duration of the usage tracking window in seconds.
+        mode: Either "credential" (one window per credential) or "per_model"
+              (separate window per model or model group).
+        description: Human-readable description for logging.
+        field_name: The key used in usage data JSON structure.
+                    Typically "models" for per_model mode, "daily" for credential mode.
+    """
+    window_seconds: int
+    mode: str  # "credential" or "per_model"
+    description: str
+    field_name: str = "daily"  # Default for backwards compatibility
+# Type aliases for provider configuration
+TierPriorityMap = Dict[str, int]  # tier_name -> priority
+UsageConfigKey = Union[FrozenSet[int], str]  # frozenset of priorities OR "default"
+UsageConfigMap = Dict[UsageConfigKey, UsageResetConfigDef]  # priority_set -> config
+QuotaGroupMap = Dict[str, List[str]]  # group_name -> [models]
 class ProviderInterface(ABC):
     """
     An interface for API provider-specific functionality, including model
     skip_cost_calculation: bool = False
+    # Default rotation mode for this provider ("balanced" or "sequential")
+    # - "balanced": Rotate credentials to distribute load evenly
+    # - "sequential": Use one credential until exhausted, then switch to next
+    default_rotation_mode: str = "balanced"
+    # =========================================================================
+    # TIER CONFIGURATION - Override in subclass
+    # =========================================================================
+    # Provider name for env var lookups (e.g., "antigravity", "gemini_cli")
+    # Used for: QUOTA_GROUPS_{provider_env_name}_{GROUP}
+    provider_env_name: str = ""
+    # Tier name -> priority mapping (Single Source of Truth)
+    # Lower numbers = higher priority (1 is highest)
+    # Multiple tiers can map to the same priority
+    # Unknown tiers fall back to default_tier_priority
+    tier_priorities: TierPriorityMap = {}
+    # Default priority for tiers not in tier_priorities mapping
+    default_tier_priority: int = 10
+    # =========================================================================
+    # USAGE RESET CONFIGURATION - Override in subclass
+    # =========================================================================
+    # Usage reset configurations keyed by priority sets
+    # Keys: frozenset of priority values (e.g., frozenset({1, 2})) OR "default"
+    # The "default" key is used for any priority not matched by a frozenset
+    usage_reset_configs: UsageConfigMap = {}
+    # =========================================================================
+    # MODEL QUOTA GROUPS - Override in subclass
+    # =========================================================================
+    # Models that share quota/cooldown timing
+    # Can be overridden via env: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
+    model_quota_groups: QuotaGroupMap = {}
+    # Model usage weights for grouped usage calculation
+    # When calculating combined usage for quota groups, each model's usage
+    # is multiplied by its weight. This accounts for models that consume
+    # more quota per request (e.g., Opus uses more than Sonnet).
+    # Models not in the map default to weight 1.
+    # Example: {"claude-opus-4-5": 2} means Opus usage counts 2x
+    model_usage_weights: Dict[str, int] = {}
+    # =========================================================================
+    # PRIORITY CONCURRENCY MULTIPLIERS - Override in subclass
+    # =========================================================================
+    # Priority-based concurrency multipliers (universal, applies to all modes)
+    # Maps priority level -> multiplier
+    # Higher priority credentials (lower number) can have higher multipliers
+    # to allow more concurrent requests
+    # Example: {1: 5, 2: 3} means Priority 1 gets 5x, Priority 2 gets 3x
+    default_priority_multipliers: Dict[int, int] = {}
+    # Fallback multiplier for sequential mode when priority not in default_priority_multipliers
+    # This is used for lower-priority tiers in sequential mode to maintain some stickiness
+    # Default: 1 (no multiplier effect)
+    default_sequential_fallback_multiplier: int = 1
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
         pass
     # [NEW] Credential Prioritization System
+    # =========================================================================
+    # TIER RESOLUTION LOGIC (Centralized)
+    # =========================================================================
+    def _resolve_tier_priority(self, tier_name: Optional[str]) -> int:
+        """
+        Resolve priority for a tier name using provider's tier_priorities mapping.
+        Args:
+            tier_name: The tier name string (e.g., "free-tier", "standard-tier")
+        Returns:
+            Priority level from tier_priorities, or default_tier_priority if
+            tier_name is None or not found in the mapping.
+        """
+        if tier_name is None:
+            return self.default_tier_priority
+        return self.tier_priorities.get(tier_name, self.default_tier_priority)
     def get_credential_priority(self, credential: str) -> Optional[int]:
         """
         Returns the priority level for a credential.
         Lower numbers = higher priority (1 is highest).
+        Returns None if tier not yet discovered.
+        Uses the provider's tier_priorities mapping to resolve priority from
+        tier name. Unknown tiers fall back to default_tier_priority.
+        Subclasses should:
+        1. Define tier_priorities dict with all known tier names
+        2. Override get_credential_tier_name() for tier lookup
+        Do NOT override this method.
         Args:
             credential: The credential identifier (API key or path)
         Returns:
+            Priority level (1-10) or None if tier not yet discovered
         """
+        tier = self.get_credential_tier_name(credential)
+        if tier is None:
+            return None  # Tier not yet discovered
+        return self._resolve_tier_priority(tier)
     def get_model_tier_requirement(self, model: str) -> Optional[int]:
         """
             Tier name string (e.g., "free-tier", "paid-tier") or None if unknown
         """
         return None
+    # =========================================================================
+    # Sequential Rotation Support
+    # =========================================================================
+    @classmethod
+    def get_rotation_mode(cls, provider_name: str) -> str:
+        """
+        Get the rotation mode for this provider.
+        Checks ROTATION_MODE_{PROVIDER} environment variable first,
+        then falls back to the class's default_rotation_mode.
+        Args:
+            provider_name: The provider name (e.g., "antigravity", "gemini_cli")
+        Returns:
+            "balanced" or "sequential"
+        """
+        env_key = f"ROTATION_MODE_{provider_name.upper()}"
+        return os.getenv(env_key, cls.default_rotation_mode)
+    @staticmethod
+    def parse_quota_error(
+        error: Exception, error_body: Optional[str] = None
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Parse a quota/rate-limit error and extract structured information.
+        Providers should override this method to handle their specific error formats.
+        This allows the error_handler to use provider-specific parsing when available,
+        falling back to generic parsing otherwise.
+        Args:
+            error: The caught exception
+            error_body: Optional raw response body string
+        Returns:
+            None if not a parseable quota error, otherwise:
+            {
+                "retry_after": int,  # seconds until quota resets
+                "reason": str,       # e.g., "QUOTA_EXHAUSTED", "RATE_LIMITED"
+                "reset_timestamp": str | None,  # ISO timestamp if available
+                "quota_reset_timestamp": float | None,  # Unix timestamp for quota reset
+            }
+        """
+        return None  # Default: no provider-specific parsing
+    # =========================================================================
+    # Per-Provider Usage Tracking Configuration
+    # =========================================================================
+    # =========================================================================
+    # USAGE RESET CONFIG LOGIC (Centralized)
+    # =========================================================================
+    def _find_usage_config_for_priority(
+        self, priority: int
+    ) -> Optional[UsageResetConfigDef]:
+        """
+        Find usage config that applies to a priority value.
+        Checks frozenset keys first (priority must be in the set),
+        then falls back to "default" key if no match found.
+        Args:
+            priority: The credential priority level
+        Returns:
+            UsageResetConfigDef if found, None otherwise
+        """
+        # First, check frozenset keys for explicit priority match
+        for key, config in self.usage_reset_configs.items():
+            if isinstance(key, frozenset) and priority in key:
+                return config
+        # Fall back to "default" key
+        return self.usage_reset_configs.get("default")
+    def _build_usage_reset_config(
+        self, tier_name: Optional[str]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Build usage reset configuration dict for a tier.
+        Resolves tier to priority, then finds matching usage config.
+        Returns None if provider doesn't define usage_reset_configs.
+        Args:
+            tier_name: The tier name string
+        Returns:
+            Usage config dict with window_seconds, mode, priority, description,
+            field_name, or None if no config applies
+        """
+        if not self.usage_reset_configs:
+            return None
+        priority = self._resolve_tier_priority(tier_name)
+        config = self._find_usage_config_for_priority(priority)
+        if config is None:
+            return None
+        return {
+            "window_seconds": config.window_seconds,
+            "mode": config.mode,
+            "priority": priority,
+            "description": config.description,
+            "field_name": config.field_name,
+        }
+    def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
+        """
+        Get provider-specific usage tracking configuration for a credential.
+        Uses the provider's usage_reset_configs class attribute to build
+        the configuration dict. Priority is auto-derived from tier.
+        Subclasses should define usage_reset_configs as a class attribute
+        instead of overriding this method. Only override get_credential_tier_name()
+        to provide the tier lookup mechanism.
+        The UsageManager will use this configuration to:
+        1. Track usage per-model or per-credential based on mode
+        2. Reset usage based on a rolling window OR quota exhausted timestamp
+        3. Archive stats to "global" when the window/quota expires
+        Args:
+            credential: The credential identifier (API key or path)
+        Returns:
+            None to use default daily reset, otherwise a dict with:
+            {
+                "window_seconds": int,     # Duration in seconds (e.g., 18000 for 5h)
+                "mode": str,               # "credential" or "per_model"
+                "priority": int,           # Priority level (auto-derived from tier)
+                "description": str,        # Human-readable description (for logging)
+            }
+        Modes:
+            - "credential": One window per credential. Window starts from first
+              request of ANY model. All models reset together when window expires.
+            - "per_model": Separate window per model (or model group). Window starts
+              from first request of THAT model. Models reset independently unless
+              grouped. If a quota_exhausted error provides exact reset time, that
+              becomes the authoritative reset time for the model.
+        """
+        tier = self.get_credential_tier_name(credential)
+        return self._build_usage_reset_config(tier)
+    def get_default_usage_field_name(self) -> str:
+        """
+        Get the default usage tracking field name for this provider.
+        Providers can override this to use a custom field name for usage tracking
+        when no credential-specific config is available.
+        Returns:
+            Field name string (default: "daily")
+        """
+        return "daily"
+    # =========================================================================
+    # Model Quota Grouping
+    # =========================================================================
+    # =========================================================================
+    # QUOTA GROUPS LOGIC (Centralized)
+    # =========================================================================
+    def _get_effective_quota_groups(self) -> QuotaGroupMap:
+        """
+        Get quota groups with .env overrides applied.
+        Env format: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
+        Set empty string to disable a default group.
+        """
+        if not self.provider_env_name or not self.model_quota_groups:
+            return self.model_quota_groups
+        result: QuotaGroupMap = {}
+        for group_name, default_models in self.model_quota_groups.items():
+            env_key = (
+                f"QUOTA_GROUPS_{self.provider_env_name.upper()}_{group_name.upper()}"
+            )
+            env_value = os.getenv(env_key)
+            if env_value is not None:
+                # Env override present
+                if env_value.strip():
+                    # Parse comma-separated models
+                    result[group_name] = [
+                        m.strip() for m in env_value.split(",") if m.strip()
+                    ]
+                # Empty string = group disabled, don't add to result
+            else:
+                # Use default
+                result[group_name] = list(default_models)
+        return result
+    def _find_model_quota_group(self, model: str) -> Optional[str]:
+        """Find which quota group a model belongs to."""
+        groups = self._get_effective_quota_groups()
+        for group_name, models in groups.items():
+            if model in models:
+                return group_name
+        return None
+    def _get_quota_group_models(self, group: str) -> List[str]:
+        """Get all models in a quota group."""
+        groups = self._get_effective_quota_groups()
+        return groups.get(group, [])
+    def get_model_quota_group(self, model: str) -> Optional[str]:
+        """
+        Returns the quota group name for a model, or None if not grouped.
+        Uses the provider's model_quota_groups class attribute with .env overrides
+        via QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2".
+        Models in the same quota group share cooldown timing - when one model
+        hits a quota exhausted error, all models in the group get the same
+        reset timestamp. They also reset (archive stats) together.
+        Subclasses should define model_quota_groups as a class attribute
+        instead of overriding this method.
+        Args:
+            model: Model name (with or without provider prefix)
+        Returns:
+            Group name string (e.g., "claude") or None if model is not grouped
+        """
+        # Strip provider prefix if present
+        clean_model = model.split("/")[-1] if "/" in model else model
+        return self._find_model_quota_group(clean_model)
+    def get_models_in_quota_group(self, group: str) -> List[str]:
+        """
+        Returns all model names that belong to a quota group.
+        Uses the provider's model_quota_groups class attribute with .env overrides.
+        Args:
+            group: Group name (e.g., "claude")
+        Returns:
+            List of model names (WITHOUT provider prefix) in the group.
+            Empty list if group doesn't exist.
+        """
+        return self._get_quota_group_models(group)
+    def get_model_usage_weight(self, model: str) -> int:
+        """
+        Returns the usage weight for a model when calculating grouped usage.
+        Models with higher weights contribute more to the combined group usage.
+        This accounts for models that consume more quota per request.
+        Args:
+            model: Model name (with or without provider prefix)
+        Returns:
+            Weight multiplier (default 1 if not configured)
+        """
+        # Strip provider prefix if present
+        clean_model = model.split("/")[-1] if "/" in model else model
+        return self.model_usage_weights.get(clean_model, 1)

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -5,7 +5,7 @@ import logging
 import asyncio
 import random
 from datetime import date, datetime, timezone, time as dt_time
-from typing import Any, Dict, List, Optional, Set
 import aiofiles
 import litellm
@@ -42,6 +42,10 @@ class UsageManager:
     This ensures lower-usage credentials are preferred while tolerance controls how much
     randomness is introduced into the selection process.
     """
     def __init__(
@@ -49,6 +53,13 @@ class UsageManager:
         file_path: str = "key_usage.json",
         daily_reset_time_utc: Optional[str] = "03:00",
         rotation_tolerance: float = 0.0,
     ):
         """
         Initialize the UsageManager.
@@ -60,9 +71,28 @@ class UsageManager:
                 - 0.0: Deterministic, least-used credential always selected
                 - tolerance = 2.0 - 4.0 (default, recommended): Balanced randomness, can pick credentials within 2 uses of max
                 - 5.0+: High randomness, more unpredictable selection patterns
         """
         self.file_path = file_path
         self.rotation_tolerance = rotation_tolerance
         self.key_states: Dict[str, Dict[str, Any]] = {}
         self._data_lock = asyncio.Lock()
@@ -81,6 +111,426 @@ class UsageManager:
         else:
             self.daily_reset_time_utc = None
     async def _lazy_init(self):
         """Initializes the usage data by loading it from the file asynchronously."""
         async with self._init_lock:
@@ -107,85 +557,412 @@ class UsageManager:
         if self._usage_data is None:
             return
         async with self._data_lock:
             async with aiofiles.open(self.file_path, "w") as f:
                 await f.write(json.dumps(self._usage_data, indent=2))
     async def _reset_daily_stats_if_needed(self):
-        """Checks if daily stats need to be reset for any key."""
-        if self._usage_data is None or not self.daily_reset_time_utc:
             return
         now_utc = datetime.now(timezone.utc)
         today_str = now_utc.date().isoformat()
         needs_saving = False
         for key, data in self._usage_data.items():
-            last_reset_str = data.get("last_daily_reset", "")
-            if last_reset_str != today_str:
-                last_reset_dt = None
-                if last_reset_str:
-                    # Ensure the parsed datetime is timezone-aware (UTC)
-                    last_reset_dt = datetime.fromisoformat(last_reset_str).replace(
-                        tzinfo=timezone.utc
-                    )
-                # Determine the reset threshold for today
-                reset_threshold_today = datetime.combine(
-                    now_utc.date(), self.daily_reset_time_utc
-                )
-                if (
-                    last_reset_dt is None
-                    or last_reset_dt < reset_threshold_today <= now_utc
-                ):
-                    lib_logger.debug(
-                        f"Performing daily reset for key {mask_credential(key)}"
                     )
-                    needs_saving = True
-                    # Reset cooldowns
-                    data["model_cooldowns"] = {}
-                    data["key_cooldown_until"] = None
-                    # Reset consecutive failures
-                    if "failures" in data:
-                        data["failures"] = {}
-                    # Archive global stats from the previous day's 'daily'
-                    daily_data = data.get("daily", {})
-                    if daily_data:
-                        global_data = data.setdefault("global", {"models": {}})
-                        for model, stats in daily_data.get("models", {}).items():
-                            global_model_stats = global_data["models"].setdefault(
-                                model,
-                                {
-                                    "success_count": 0,
-                                    "prompt_tokens": 0,
-                                    "completion_tokens": 0,
-                                    "approx_cost": 0.0,
-                                },
-                            )
-                            global_model_stats["success_count"] += stats.get(
-                                "success_count", 0
-                            )
-                            global_model_stats["prompt_tokens"] += stats.get(
-                                "prompt_tokens", 0
-                            )
-                            global_model_stats["completion_tokens"] += stats.get(
-                                "completion_tokens", 0
-                            )
-                            global_model_stats["approx_cost"] += stats.get(
-                                "approx_cost", 0.0
-                            )
-                    # Reset daily stats
-                    data["daily"] = {"date": today_str, "models": {}}
-                    data["last_daily_reset"] = today_str
         if needs_saving:
             await self._save_usage()
     def _initialize_key_states(self, keys: List[str]):
         """Initializes state tracking for all provided keys if not already present."""
         for key in keys:
@@ -306,12 +1083,8 @@ class UsageManager:
                         priority = credential_priorities.get(key, 999)
                         # Get usage count for load balancing within priority groups
-                        usage_count = (
-                            key_data.get("daily", {})
-                            .get("models", {})
-                            .get(model, {})
-                            .get("success_count", 0)
-                        )
                         # Group by priority
                         if priority not in priority_groups:
@@ -324,6 +1097,16 @@ class UsageManager:
                 for priority_level in sorted_priorities:
                     keys_in_priority = priority_groups[priority_level]
                     # Within each priority group, use existing tier1/tier2 logic
                     tier1_keys, tier2_keys = [], []
                     for key, usage_count in keys_in_priority:
@@ -333,18 +1116,27 @@ class UsageManager:
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests
-                        elif key_state["models_in_use"].get(model, 0) < max_concurrent:
                             tier2_keys.append((key, usage_count))
-                    # Apply weighted random selection or deterministic sorting
-                    selection_method = (
-                        "weighted-random"
-                        if self.rotation_tolerance > 0
-                        else "least-used"
-                    )
-                    if self.rotation_tolerance > 0:
-                        # Weighted random selection within each tier
                         if tier1_keys:
                             selected_key = self._select_weighted_random(
                                 tier1_keys, self.rotation_tolerance
@@ -361,6 +1153,7 @@ class UsageManager:
                             ]
                     else:
                         # Deterministic: sort by usage within each tier
                         tier1_keys.sort(key=lambda x: x[1])
                         tier2_keys.sort(key=lambda x: x[1])
@@ -386,7 +1179,7 @@ class UsageManager:
                         state = self.key_states[key]
                         async with state["lock"]:
                             current_count = state["models_in_use"].get(model, 0)
-                            if current_count < max_concurrent:
                                 state["models_in_use"][model] = current_count + 1
                                 tier_name = (
                                     credential_tier_names.get(key, "unknown")
@@ -395,7 +1188,7 @@ class UsageManager:
                                 )
                                 lib_logger.info(
                                     f"Acquired key {mask_credential(key)} for model {model} "
-                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
                                 )
                                 return key
@@ -424,6 +1217,19 @@ class UsageManager:
             else:
                 # Original logic when no priorities specified
                 tier1_keys, tier2_keys = [], []
                 # First, filter the list of available keys to exclude any on cooldown.
@@ -437,28 +1243,35 @@ class UsageManager:
                             continue
                         # Prioritize keys based on their current usage to ensure load balancing.
-                        usage_count = (
-                            key_data.get("daily", {})
-                            .get("models", {})
-                            .get(model, {})
-                            .get("success_count", 0)
-                        )
                         key_state = self.key_states[key]
                         # Tier 1: Completely idle keys (preferred).
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests for this model.
-                        elif key_state["models_in_use"].get(model, 0) < max_concurrent:
                             tier2_keys.append((key, usage_count))
-                # Apply weighted random selection or deterministic sorting
-                selection_method = (
-                    "weighted-random" if self.rotation_tolerance > 0 else "least-used"
-                )
-                if self.rotation_tolerance > 0:
-                    # Weighted random selection within each tier
                     if tier1_keys:
                         selected_key = self._select_weighted_random(
                             tier1_keys, self.rotation_tolerance
@@ -475,6 +1288,7 @@ class UsageManager:
                         ]
                 else:
                     # Deterministic: sort by usage within each tier
                     tier1_keys.sort(key=lambda x: x[1])
                     tier2_keys.sort(key=lambda x: x[1])
@@ -501,7 +1315,7 @@ class UsageManager:
                     state = self.key_states[key]
                     async with state["lock"]:
                         current_count = state["models_in_use"].get(model, 0)
-                        if current_count < max_concurrent:
                             state["models_in_use"][model] = current_count + 1
                             tier_name = (
                                 credential_tier_names.get(key)
@@ -511,7 +1325,7 @@ class UsageManager:
                             tier_info = f"tier: {tier_name}, " if tier_name else ""
                             lib_logger.info(
                                 f"Acquired key {mask_credential(key)} for model {model} "
-                                f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
                             )
                             return key
@@ -585,70 +1399,131 @@ class UsageManager:
         """
         Records a successful API call, resetting failure counters.
         It safely handles cases where token usage data is not available.
         """
         await self._lazy_init()
         async with self._data_lock:
             today_utc_str = datetime.now(timezone.utc).date().isoformat()
-            key_data = self._usage_data.setdefault(
-                key,
-                {
-                    "daily": {"date": today_utc_str, "models": {}},
-                    "global": {"models": {}},
-                    "model_cooldowns": {},
-                    "failures": {},
-                },
             )
-            # If the key is new, ensure its reset date is initialized to prevent an immediate reset.
-            if "last_daily_reset" not in key_data:
-                key_data["last_daily_reset"] = today_utc_str
-            # Always record a success and reset failures
             model_failures = key_data.setdefault("failures", {}).setdefault(model, {})
             model_failures["consecutive_failures"] = 0
             if model in key_data.get("model_cooldowns", {}):
                 del key_data["model_cooldowns"][model]
-            daily_model_data = key_data["daily"]["models"].setdefault(
-                model,
-                {
-                    "success_count": 0,
-                    "prompt_tokens": 0,
-                    "completion_tokens": 0,
-                    "approx_cost": 0.0,
-                },
-            )
-            daily_model_data["success_count"] += 1
-            # Safely attempt to record token and cost usage
             if (
                 completion_response
                 and hasattr(completion_response, "usage")
                 and completion_response.usage
             ):
                 usage = completion_response.usage
-                daily_model_data["prompt_tokens"] += usage.prompt_tokens
-                daily_model_data["completion_tokens"] += getattr(
                     usage, "completion_tokens", 0
-                )  # Not present in embedding responses
                 lib_logger.info(
                     f"Recorded usage from response object for key {mask_credential(key)}"
                 )
                 try:
                     provider_name = model.split("/")[0]
-                    provider_plugin = PROVIDER_PLUGINS.get(provider_name)
-                    # Check class attribute directly - no need to instantiate
-                    if provider_plugin and getattr(
-                        provider_plugin, "skip_cost_calculation", False
                     ):
                         lib_logger.debug(
                             f"Skipping cost calculation for provider '{provider_name}' (custom provider)."
                         )
                     else:
-                        # Differentiate cost calculation based on response type
                         if isinstance(completion_response, litellm.EmbeddingResponse):
-                            # Manually calculate cost for embeddings
                             model_info = litellm.get_model_info(model)
                             input_cost = model_info.get("input_cost_per_token")
                             if input_cost:
@@ -663,7 +1538,7 @@ class UsageManager:
                             )
                         if cost is not None:
-                            daily_model_data["approx_cost"] += cost
                 except Exception as e:
                     lib_logger.warning(
                         f"Could not calculate cost for model {model}: {e}"
@@ -671,14 +1546,13 @@ class UsageManager:
             elif isinstance(completion_response, asyncio.Future) or hasattr(
                 completion_response, "__aiter__"
             ):
-                # This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
-                pass
             else:
                 lib_logger.warning(
                     f"No usage data found in completion response for model {model}. Recording success without token count."
                 )
-            key_data["last_used_ts"] = time.time()
         await self._save_usage()
@@ -689,7 +1563,13 @@ class UsageManager:
         classified_error: ClassifiedError,
         increment_consecutive_failures: bool = True,
     ):
-        """Records a failure and applies cooldowns based on an escalating backoff strategy.
         Args:
             key: The API key or credential identifier
@@ -700,17 +1580,36 @@ class UsageManager:
         """
         await self._lazy_init()
         async with self._data_lock:
             today_utc_str = datetime.now(timezone.utc).date().isoformat()
-            key_data = self._usage_data.setdefault(
-                key,
-                {
-                    "daily": {"date": today_utc_str, "models": {}},
-                    "global": {"models": {}},
-                    "model_cooldowns": {},
-                    "failures": {},
-                },
             )
             # Provider-level errors (transient issues) should not count against the key
             provider_level_errors = {"server_error", "api_connection"}
@@ -722,22 +1621,94 @@ class UsageManager:
             # Calculate cooldown duration based on error type
             cooldown_seconds = None
-            if classified_error.error_type in ["rate_limit", "quota_exceeded"]:
-                # Rate limit / Quota errors: use retry_after if available, otherwise default to 60s
                 cooldown_seconds = classified_error.retry_after or 60
                 lib_logger.info(
-                    f"Rate limit error on key {mask_credential(key)} for model {model}. "
-                    f"Using {'provided' if classified_error.retry_after else 'default'} retry_after: {cooldown_seconds}s"
                 )
             elif classified_error.error_type == "authentication":
                 # Apply a 5-minute key-level lockout for auth errors
-                key_data["key_cooldown_until"] = time.time() + 300
                 lib_logger.warning(
                     f"Authentication error on key {mask_credential(key)}. Applying 5-minute key-level lockout."
                 )
-                # Auth errors still use escalating backoff for the specific model
-                cooldown_seconds = 300  # 5 minutes for model cooldown
             # If we should increment failures, calculate escalating backoff
             if should_increment:
@@ -751,35 +1722,27 @@ class UsageManager:
                 # If cooldown wasn't set by specific error type, use escalating backoff
                 if cooldown_seconds is None:
                     backoff_tiers = {1: 10, 2: 30, 3: 60, 4: 120}
-                    cooldown_seconds = backoff_tiers.get(
-                        count, 7200
-                    )  # Default to 2 hours for "spent" keys
                     lib_logger.warning(
                         f"Failure #{count} for key {mask_credential(key)} with model {model}. "
-                        f"Error type: {classified_error.error_type}"
                     )
             else:
                 # Provider-level errors: apply short cooldown but don't count against key
                 if cooldown_seconds is None:
-                    cooldown_seconds = 30  # 30s cooldown for provider issues
                 lib_logger.info(
-                    f"Provider-level error ({classified_error.error_type}) for key {mask_credential(key)} with model {model}. "
-                    f"NOT incrementing consecutive failures. Applying {cooldown_seconds}s cooldown."
                 )
-            # Apply the cooldown
-            model_cooldowns = key_data.setdefault("model_cooldowns", {})
-            model_cooldowns[model] = time.time() + cooldown_seconds
-            lib_logger.warning(
-                f"Cooldown applied for key {mask_credential(key)} with model {model}: {cooldown_seconds}s. "
-                f"Error type: {classified_error.error_type}"
-            )
             # Check for key-level lockout condition
             await self._check_key_lockout(key, key_data)
             key_data["last_failure"] = {
-                "timestamp": time.time(),
                 "model": model,
                 "error": str(classified_error.original_exception),
             }

 import asyncio
 import random
 from datetime import date, datetime, timezone, time as dt_time
+from typing import Any, Dict, List, Optional, Set, Tuple
 import aiofiles
 import litellm
     This ensures lower-usage credentials are preferred while tolerance controls how much
     randomness is introduced into the selection process.
+    Additionally, providers can specify a rotation mode:
+    - "balanced" (default): Rotate credentials to distribute load evenly
+    - "sequential": Use one credential until exhausted (preserves caching)
     """
     def __init__(
         file_path: str = "key_usage.json",
         daily_reset_time_utc: Optional[str] = "03:00",
         rotation_tolerance: float = 0.0,
+        provider_rotation_modes: Optional[Dict[str, str]] = None,
+        provider_plugins: Optional[Dict[str, Any]] = None,
+        priority_multipliers: Optional[Dict[str, Dict[int, int]]] = None,
+        priority_multipliers_by_mode: Optional[
+            Dict[str, Dict[str, Dict[int, int]]]
+        ] = None,
+        sequential_fallback_multipliers: Optional[Dict[str, int]] = None,
     ):
         """
         Initialize the UsageManager.
                 - 0.0: Deterministic, least-used credential always selected
                 - tolerance = 2.0 - 4.0 (default, recommended): Balanced randomness, can pick credentials within 2 uses of max
                 - 5.0+: High randomness, more unpredictable selection patterns
+            provider_rotation_modes: Dict mapping provider names to rotation modes.
+                - "balanced": Rotate credentials to distribute load evenly (default)
+                - "sequential": Use one credential until exhausted (preserves caching)
+            provider_plugins: Dict mapping provider names to provider plugin instances.
+                Used for per-provider usage reset configuration (window durations, field names).
+            priority_multipliers: Dict mapping provider -> priority -> multiplier.
+                Universal multipliers that apply regardless of rotation mode.
+                Example: {"antigravity": {1: 5, 2: 3}}
+            priority_multipliers_by_mode: Dict mapping provider -> mode -> priority -> multiplier.
+                Mode-specific overrides. Example: {"antigravity": {"balanced": {3: 1}}}
+            sequential_fallback_multipliers: Dict mapping provider -> fallback multiplier.
+                Used in sequential mode when priority not in priority_multipliers.
+                Example: {"antigravity": 2}
         """
         self.file_path = file_path
         self.rotation_tolerance = rotation_tolerance
+        self.provider_rotation_modes = provider_rotation_modes or {}
+        self.provider_plugins = provider_plugins or PROVIDER_PLUGINS
+        self.priority_multipliers = priority_multipliers or {}
+        self.priority_multipliers_by_mode = priority_multipliers_by_mode or {}
+        self.sequential_fallback_multipliers = sequential_fallback_multipliers or {}
+        self._provider_instances: Dict[str, Any] = {}  # Cache for provider instances
         self.key_states: Dict[str, Dict[str, Any]] = {}
         self._data_lock = asyncio.Lock()
         else:
             self.daily_reset_time_utc = None
+    def _get_rotation_mode(self, provider: str) -> str:
+        """
+        Get the rotation mode for a provider.
+        Args:
+            provider: Provider name (e.g., "antigravity", "gemini_cli")
+        Returns:
+            "balanced" or "sequential"
+        """
+        return self.provider_rotation_modes.get(provider, "balanced")
+    def _get_priority_multiplier(
+        self, provider: str, priority: int, rotation_mode: str
+    ) -> int:
+        """
+        Get the concurrency multiplier for a provider/priority/mode combination.
+        Lookup order:
+        1. Mode-specific tier override: priority_multipliers_by_mode[provider][mode][priority]
+        2. Universal tier multiplier: priority_multipliers[provider][priority]
+        3. Sequential fallback (if mode is sequential): sequential_fallback_multipliers[provider]
+        4. Global default: 1 (no multiplier effect)
+        Args:
+            provider: Provider name (e.g., "antigravity")
+            priority: Priority level (1 = highest priority)
+            rotation_mode: Current rotation mode ("sequential" or "balanced")
+        Returns:
+            Multiplier value
+        """
+        provider_lower = provider.lower()
+        # 1. Check mode-specific override
+        if provider_lower in self.priority_multipliers_by_mode:
+            mode_multipliers = self.priority_multipliers_by_mode[provider_lower]
+            if rotation_mode in mode_multipliers:
+                if priority in mode_multipliers[rotation_mode]:
+                    return mode_multipliers[rotation_mode][priority]
+        # 2. Check universal tier multiplier
+        if provider_lower in self.priority_multipliers:
+            if priority in self.priority_multipliers[provider_lower]:
+                return self.priority_multipliers[provider_lower][priority]
+        # 3. Sequential fallback (only for sequential mode)
+        if rotation_mode == "sequential":
+            if provider_lower in self.sequential_fallback_multipliers:
+                return self.sequential_fallback_multipliers[provider_lower]
+        # 4. Global default
+        return 1
+    def _get_provider_from_credential(self, credential: str) -> Optional[str]:
+        """
+        Extract provider name from credential path or identifier.
+        Supports multiple credential formats:
+        - OAuth: "oauth_creds/antigravity_oauth_15.json" -> "antigravity"
+        - OAuth: "C:\\...\\oauth_creds\\gemini_cli_oauth_1.json" -> "gemini_cli"
+        - API key style: stored with provider prefix metadata
+        Args:
+            credential: The credential identifier (path or key)
+        Returns:
+            Provider name string or None if cannot be determined
+        """
+        import re
+        # Normalize path separators
+        normalized = credential.replace("\\", "/")
+        # Pattern: {provider}_oauth_{number}.json
+        match = re.search(r"/([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
+        if match:
+            return match.group(1).lower()
+        # Pattern: oauth_creds/{provider}_...
+        match = re.search(r"oauth_creds/([a-z_]+)_", normalized, re.IGNORECASE)
+        if match:
+            return match.group(1).lower()
+        return None
+    def _get_provider_instance(self, provider: str) -> Optional[Any]:
+        """
+        Get or create a provider plugin instance.
+        Args:
+            provider: The provider name
+        Returns:
+            Provider plugin instance or None
+        """
+        if not provider:
+            return None
+        plugin_class = self.provider_plugins.get(provider)
+        if not plugin_class:
+            return None
+        # Get or create provider instance from cache
+        if provider not in self._provider_instances:
+            # Instantiate the plugin if it's a class, or use it directly if already an instance
+            if isinstance(plugin_class, type):
+                self._provider_instances[provider] = plugin_class()
+            else:
+                self._provider_instances[provider] = plugin_class
+        return self._provider_instances[provider]
+    def _get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
+        """
+        Get the usage reset configuration for a credential from its provider plugin.
+        Args:
+            credential: The credential identifier
+        Returns:
+            Configuration dict with window_seconds, field_name, etc.
+            or None to use default daily reset.
+        """
+        provider = self._get_provider_from_credential(credential)
+        plugin_instance = self._get_provider_instance(provider)
+        if plugin_instance and hasattr(plugin_instance, "get_usage_reset_config"):
+            return plugin_instance.get_usage_reset_config(credential)
+        return None
+    def _get_reset_mode(self, credential: str) -> str:
+        """
+        Get the reset mode for a credential: 'credential' or 'per_model'.
+        Args:
+            credential: The credential identifier
+        Returns:
+            "per_model" or "credential" (default)
+        """
+        config = self._get_usage_reset_config(credential)
+        return config.get("mode", "credential") if config else "credential"
+    def _get_model_quota_group(self, credential: str, model: str) -> Optional[str]:
+        """
+        Get the quota group for a model, if the provider defines one.
+        Args:
+            credential: The credential identifier
+            model: Model name (with or without provider prefix)
+        Returns:
+            Group name (e.g., "claude") or None if not grouped
+        """
+        provider = self._get_provider_from_credential(credential)
+        plugin_instance = self._get_provider_instance(provider)
+        if plugin_instance and hasattr(plugin_instance, "get_model_quota_group"):
+            return plugin_instance.get_model_quota_group(model)
+        return None
+    def _get_grouped_models(self, credential: str, group: str) -> List[str]:
+        """
+        Get all model names in a quota group (with provider prefix).
+        Args:
+            credential: The credential identifier
+            group: Group name (e.g., "claude")
+        Returns:
+            List of full model names (e.g., ["antigravity/claude-opus-4-5", ...])
+        """
+        provider = self._get_provider_from_credential(credential)
+        plugin_instance = self._get_provider_instance(provider)
+        if plugin_instance and hasattr(plugin_instance, "get_models_in_quota_group"):
+            models = plugin_instance.get_models_in_quota_group(group)
+            # Add provider prefix
+            return [f"{provider}/{m}" for m in models]
+        return []
+    def _get_model_usage_weight(self, credential: str, model: str) -> int:
+        """
+        Get the usage weight for a model when calculating grouped usage.
+        Args:
+            credential: The credential identifier
+            model: Model name (with or without provider prefix)
+        Returns:
+            Weight multiplier (default 1 if not configured)
+        """
+        provider = self._get_provider_from_credential(credential)
+        plugin_instance = self._get_provider_instance(provider)
+        if plugin_instance and hasattr(plugin_instance, "get_model_usage_weight"):
+            return plugin_instance.get_model_usage_weight(model)
+        return 1
+    def _get_grouped_usage_count(self, key: str, model: str) -> int:
+        """
+        Get usage count for credential selection, considering quota groups.
+        If the model belongs to a quota group, returns the weighted combined usage
+        across all models in the group. Otherwise returns individual model usage.
+        Weights are applied per-model to account for models that consume more quota
+        per request (e.g., Opus might count 2x compared to Sonnet).
+        Args:
+            key: Credential identifier
+            model: Model name (with provider prefix, e.g., "antigravity/claude-sonnet-4-5")
+        Returns:
+            Weighted combined usage if grouped, otherwise individual model usage
+        """
+        # Check if model is in a quota group
+        group = self._get_model_quota_group(key, model)
+        if group:
+            # Get all models in the group
+            grouped_models = self._get_grouped_models(key, group)
+            # Sum weighted usage across all models in the group
+            total_weighted_usage = 0
+            for grouped_model in grouped_models:
+                usage = self._get_usage_count(key, grouped_model)
+                weight = self._get_model_usage_weight(key, grouped_model)
+                total_weighted_usage += usage * weight
+            return total_weighted_usage
+        # Not grouped - return individual model usage (no weight applied)
+        return self._get_usage_count(key, model)
+    def _get_usage_field_name(self, credential: str) -> str:
+        """
+        Get the usage tracking field name for a credential.
+        Returns the provider-specific field name if configured,
+        otherwise falls back to "daily".
+        Args:
+            credential: The credential identifier
+        Returns:
+            Field name string (e.g., "5h_window", "weekly", "daily")
+        """
+        config = self._get_usage_reset_config(credential)
+        if config and "field_name" in config:
+            return config["field_name"]
+        # Check provider default
+        provider = self._get_provider_from_credential(credential)
+        plugin_instance = self._get_provider_instance(provider)
+        if plugin_instance and hasattr(plugin_instance, "get_default_usage_field_name"):
+            return plugin_instance.get_default_usage_field_name()
+        return "daily"
+    def _get_usage_count(self, key: str, model: str) -> int:
+        """
+        Get the current usage count for a model from the appropriate usage structure.
+        Supports both:
+        - New per-model structure: {"models": {"model_name": {"success_count": N, ...}}}
+        - Legacy structure: {"daily": {"models": {"model_name": {"success_count": N, ...}}}}
+        Args:
+            key: Credential identifier
+            model: Model name
+        Returns:
+            Usage count (success_count) for the model in the current window/period
+        """
+        if self._usage_data is None:
+            return 0
+        key_data = self._usage_data.get(key, {})
+        reset_mode = self._get_reset_mode(key)
+        if reset_mode == "per_model":
+            # New per-model structure: key_data["models"][model]["success_count"]
+            return key_data.get("models", {}).get(model, {}).get("success_count", 0)
+        else:
+            # Legacy structure: key_data["daily"]["models"][model]["success_count"]
+            return (
+                key_data.get("daily", {})
+                .get("models", {})
+                .get(model, {})
+                .get("success_count", 0)
+            )
+    # =========================================================================
+    # TIMESTAMP FORMATTING HELPERS
+    # =========================================================================
+    def _format_timestamp_local(self, ts: Optional[float]) -> Optional[str]:
+        """
+        Format Unix timestamp as local time string with timezone offset.
+        Args:
+            ts: Unix timestamp or None
+        Returns:
+            Formatted string like "2025-12-07 14:30:17 +0100" or None
+        """
+        if ts is None:
+            return None
+        try:
+            dt = datetime.fromtimestamp(ts).astimezone()  # Local timezone
+            # Use UTC offset for conciseness (works on all platforms)
+            return dt.strftime("%Y-%m-%d %H:%M:%S %z")
+        except (OSError, ValueError, OverflowError):
+            return None
+    def _add_readable_timestamps(self, data: Dict) -> Dict:
+        """
+        Add human-readable timestamp fields to usage data before saving.
+        Adds 'window_started' and 'quota_resets' fields derived from
+        Unix timestamps for easier debugging and monitoring.
+        Args:
+            data: The usage data dict to enhance
+        Returns:
+            The same dict with readable timestamp fields added
+        """
+        for key, key_data in data.items():
+            # Handle per-model structure
+            models = key_data.get("models", {})
+            for model_name, model_stats in models.items():
+                if not isinstance(model_stats, dict):
+                    continue
+                # Add readable window start time
+                window_start = model_stats.get("window_start_ts")
+                if window_start:
+                    model_stats["window_started"] = self._format_timestamp_local(
+                        window_start
+                    )
+                elif "window_started" in model_stats:
+                    del model_stats["window_started"]
+                # Add readable reset time
+                quota_reset = model_stats.get("quota_reset_ts")
+                if quota_reset:
+                    model_stats["quota_resets"] = self._format_timestamp_local(
+                        quota_reset
+                    )
+                elif "quota_resets" in model_stats:
+                    del model_stats["quota_resets"]
+        return data
+    def _sort_sequential(
+        self,
+        candidates: List[Tuple[str, int]],
+        credential_priorities: Optional[Dict[str, int]] = None,
+    ) -> List[Tuple[str, int]]:
+        """
+        Sort credentials for sequential mode with position retention.
+        Credentials maintain their position based on established usage patterns,
+        ensuring that actively-used credentials remain primary until exhausted.
+        Sorting order (within each sort key, lower value = higher priority):
+        1. Priority tier (lower number = higher priority)
+        2. Usage count (higher = more established in rotation, maintains position)
+        3. Last used timestamp (higher = more recent, tiebreaker for stickiness)
+        4. Credential ID (alphabetical, stable ordering)
+        Args:
+            candidates: List of (credential_id, usage_count) tuples
+            credential_priorities: Optional dict mapping credentials to priority levels
+        Returns:
+            Sorted list of candidates (same format as input)
+        """
+        if not candidates:
+            return []
+        if len(candidates) == 1:
+            return candidates
+        def sort_key(item: Tuple[str, int]) -> Tuple[int, int, float, str]:
+            cred, usage_count = item
+            priority = (
+                credential_priorities.get(cred, 999) if credential_priorities else 999
+            )
+            last_used = (
+                self._usage_data.get(cred, {}).get("last_used_ts", 0)
+                if self._usage_data
+                else 0
+            )
+            return (
+                priority,  # ASC: lower priority number = higher priority
+                -usage_count,  # DESC: higher usage = more established
+                -last_used,  # DESC: more recent = preferred for ties
+                cred,  # ASC: stable alphabetical ordering
+            )
+        sorted_candidates = sorted(candidates, key=sort_key)
+        # Debug logging - show top 3 credentials in ordering
+        if lib_logger.isEnabledFor(logging.DEBUG):
+            order_info = [
+                f"{mask_credential(c)}(p={credential_priorities.get(c, 999) if credential_priorities else 'N/A'}, u={u})"
+                for c, u in sorted_candidates[:3]
+            ]
+            lib_logger.debug(f"Sequential ordering: {' → '.join(order_info)}")
+        return sorted_candidates
     async def _lazy_init(self):
         """Initializes the usage data by loading it from the file asynchronously."""
         async with self._init_lock:
         if self._usage_data is None:
             return
         async with self._data_lock:
+            # Add human-readable timestamp fields before saving
+            self._add_readable_timestamps(self._usage_data)
             async with aiofiles.open(self.file_path, "w") as f:
                 await f.write(json.dumps(self._usage_data, indent=2))
     async def _reset_daily_stats_if_needed(self):
+        """
+        Checks if usage stats need to be reset for any key.
+        Supports three reset modes:
+        1. per_model: Each model has its own window, resets based on quota_reset_ts or fallback window
+        2. credential: One window per credential (legacy with custom window duration)
+        3. daily: Legacy daily reset at daily_reset_time_utc
+        """
+        if self._usage_data is None:
             return
         now_utc = datetime.now(timezone.utc)
+        now_ts = time.time()
         today_str = now_utc.date().isoformat()
         needs_saving = False
         for key, data in self._usage_data.items():
+            reset_config = self._get_usage_reset_config(key)
+            if reset_config:
+                reset_mode = reset_config.get("mode", "credential")
+                if reset_mode == "per_model":
+                    # Per-model window reset
+                    needs_saving |= await self._check_per_model_resets(
+                        key, data, reset_config, now_ts
                     )
+                else:
+                    # Credential-level window reset (legacy)
+                    needs_saving |= await self._check_window_reset(
+                        key, data, reset_config, now_ts
+                    )
+            elif self.daily_reset_time_utc:
+                # Legacy daily reset
+                needs_saving |= await self._check_daily_reset(
+                    key, data, now_utc, today_str, now_ts
+                )
         if needs_saving:
             await self._save_usage()
+    async def _check_per_model_resets(
+        self,
+        key: str,
+        data: Dict[str, Any],
+        reset_config: Dict[str, Any],
+        now_ts: float,
+    ) -> bool:
+        """
+        Check and perform per-model resets for a credential.
+        Each model resets independently based on:
+        1. quota_reset_ts (authoritative, from quota exhausted error) if set
+        2. window_start_ts + window_seconds (fallback) otherwise
+        Grouped models reset together - all models in a group must be ready.
+        Args:
+            key: Credential identifier
+            data: Usage data for this credential
+            reset_config: Provider's reset configuration
+            now_ts: Current timestamp
+        Returns:
+            True if data was modified and needs saving
+        """
+        window_seconds = reset_config.get("window_seconds", 86400)
+        models_data = data.get("models", {})
+        if not models_data:
+            return False
+        modified = False
+        processed_groups = set()
+        for model, model_data in list(models_data.items()):
+            # Check if this model is in a quota group
+            group = self._get_model_quota_group(key, model)
+            if group:
+                if group in processed_groups:
+                    continue  # Already handled this group
+                # Check if entire group should reset
+                if self._should_group_reset(
+                    key, group, models_data, window_seconds, now_ts
+                ):
+                    # Archive and reset all models in group
+                    grouped_models = self._get_grouped_models(key, group)
+                    archived_count = 0
+                    for grouped_model in grouped_models:
+                        if grouped_model in models_data:
+                            gm_data = models_data[grouped_model]
+                            self._archive_model_to_global(data, grouped_model, gm_data)
+                            self._reset_model_data(gm_data)
+                            archived_count += 1
+                    if archived_count > 0:
+                        lib_logger.info(
+                            f"Reset model group '{group}' ({archived_count} models) for {mask_credential(key)}"
+                        )
+                        modified = True
+                processed_groups.add(group)
+            else:
+                # Ungrouped model - check individually
+                if self._should_model_reset(model_data, window_seconds, now_ts):
+                    self._archive_model_to_global(data, model, model_data)
+                    self._reset_model_data(model_data)
+                    lib_logger.info(f"Reset model {model} for {mask_credential(key)}")
+                    modified = True
+        # Preserve unexpired cooldowns
+        if modified:
+            self._preserve_unexpired_cooldowns(key, data, now_ts)
+            if "failures" in data:
+                data["failures"] = {}
+        return modified
+    def _should_model_reset(
+        self, model_data: Dict[str, Any], window_seconds: int, now_ts: float
+    ) -> bool:
+        """
+        Check if a single model should reset.
+        Returns True if:
+        - quota_reset_ts is set AND now >= quota_reset_ts, OR
+        - quota_reset_ts is NOT set AND now >= window_start_ts + window_seconds
+        """
+        quota_reset = model_data.get("quota_reset_ts")
+        window_start = model_data.get("window_start_ts")
+        if quota_reset:
+            return now_ts >= quota_reset
+        elif window_start:
+            return now_ts >= window_start + window_seconds
+        return False
+    def _should_group_reset(
+        self,
+        key: str,
+        group: str,
+        models_data: Dict[str, Dict],
+        window_seconds: int,
+        now_ts: float,
+    ) -> bool:
+        """
+        Check if all models in a group should reset.
+        All models in the group must be ready to reset.
+        If any model has an active cooldown/window, the whole group waits.
+        """
+        grouped_models = self._get_grouped_models(key, group)
+        # Track if any model in group has data
+        any_has_data = False
+        for grouped_model in grouped_models:
+            model_data = models_data.get(grouped_model, {})
+            if not model_data or (
+                model_data.get("window_start_ts") is None
+                and model_data.get("success_count", 0) == 0
+            ):
+                continue  # No stats for this model yet
+            any_has_data = True
+            if not self._should_model_reset(model_data, window_seconds, now_ts):
+                return False  # At least one model not ready
+        return any_has_data
+    def _archive_model_to_global(
+        self, data: Dict[str, Any], model: str, model_data: Dict[str, Any]
+    ) -> None:
+        """Archive a single model's stats to global."""
+        global_data = data.setdefault("global", {"models": {}})
+        global_model = global_data["models"].setdefault(
+            model,
+            {
+                "success_count": 0,
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "approx_cost": 0.0,
+            },
+        )
+        global_model["success_count"] += model_data.get("success_count", 0)
+        global_model["prompt_tokens"] += model_data.get("prompt_tokens", 0)
+        global_model["completion_tokens"] += model_data.get("completion_tokens", 0)
+        global_model["approx_cost"] += model_data.get("approx_cost", 0.0)
+    def _reset_model_data(self, model_data: Dict[str, Any]) -> None:
+        """Reset a model's window and stats."""
+        model_data["window_start_ts"] = None
+        model_data["quota_reset_ts"] = None
+        model_data["success_count"] = 0
+        model_data["prompt_tokens"] = 0
+        model_data["completion_tokens"] = 0
+        model_data["approx_cost"] = 0.0
+    async def _check_window_reset(
+        self,
+        key: str,
+        data: Dict[str, Any],
+        reset_config: Dict[str, Any],
+        now_ts: float,
+    ) -> bool:
+        """
+        Check and perform rolling window reset for a credential.
+        Args:
+            key: Credential identifier
+            data: Usage data for this credential
+            reset_config: Provider's reset configuration
+            now_ts: Current timestamp
+        Returns:
+            True if data was modified and needs saving
+        """
+        window_seconds = reset_config.get("window_seconds", 86400)  # Default 24h
+        field_name = reset_config.get("field_name", "window")
+        description = reset_config.get("description", "rolling window")
+        # Get current window data
+        window_data = data.get(field_name, {})
+        window_start = window_data.get("start_ts")
+        # No window started yet - nothing to reset
+        if window_start is None:
+            return False
+        # Check if window has expired
+        window_end = window_start + window_seconds
+        if now_ts < window_end:
+            # Window still active
+            return False
+        # Window expired - perform reset
+        hours_elapsed = (now_ts - window_start) / 3600
+        lib_logger.info(
+            f"Resetting {field_name} for {mask_credential(key)} - "
+            f"{description} expired after {hours_elapsed:.1f}h"
+        )
+        # Archive to global
+        self._archive_to_global(data, window_data)
+        # Preserve unexpired cooldowns
+        self._preserve_unexpired_cooldowns(key, data, now_ts)
+        # Reset window stats (but don't start new window until first request)
+        data[field_name] = {"start_ts": None, "models": {}}
+        # Reset consecutive failures
+        if "failures" in data:
+            data["failures"] = {}
+        return True
+    async def _check_daily_reset(
+        self,
+        key: str,
+        data: Dict[str, Any],
+        now_utc: datetime,
+        today_str: str,
+        now_ts: float,
+    ) -> bool:
+        """
+        Check and perform legacy daily reset for a credential.
+        Args:
+            key: Credential identifier
+            data: Usage data for this credential
+            now_utc: Current datetime in UTC
+            today_str: Today's date as ISO string
+            now_ts: Current timestamp
+        Returns:
+            True if data was modified and needs saving
+        """
+        last_reset_str = data.get("last_daily_reset", "")
+        if last_reset_str == today_str:
+            return False
+        last_reset_dt = None
+        if last_reset_str:
+            try:
+                last_reset_dt = datetime.fromisoformat(last_reset_str).replace(
+                    tzinfo=timezone.utc
+                )
+            except ValueError:
+                pass
+        # Determine the reset threshold for today
+        reset_threshold_today = datetime.combine(
+            now_utc.date(), self.daily_reset_time_utc
+        )
+        if not (
+            last_reset_dt is None or last_reset_dt < reset_threshold_today <= now_utc
+        ):
+            return False
+        lib_logger.debug(f"Performing daily reset for key {mask_credential(key)}")
+        # Preserve unexpired cooldowns
+        self._preserve_unexpired_cooldowns(key, data, now_ts)
+        # Reset consecutive failures
+        if "failures" in data:
+            data["failures"] = {}
+        # Archive daily stats to global
+        daily_data = data.get("daily", {})
+        if daily_data:
+            self._archive_to_global(data, daily_data)
+        # Reset daily stats
+        data["daily"] = {"date": today_str, "models": {}}
+        data["last_daily_reset"] = today_str
+        return True
+    def _archive_to_global(
+        self, data: Dict[str, Any], source_data: Dict[str, Any]
+    ) -> None:
+        """
+        Archive usage stats from a source field (daily/window) to global.
+        Args:
+            data: The credential's usage data
+            source_data: The source field data to archive (has "models" key)
+        """
+        global_data = data.setdefault("global", {"models": {}})
+        for model, stats in source_data.get("models", {}).items():
+            global_model_stats = global_data["models"].setdefault(
+                model,
+                {
+                    "success_count": 0,
+                    "prompt_tokens": 0,
+                    "completion_tokens": 0,
+                    "approx_cost": 0.0,
+                },
+            )
+            global_model_stats["success_count"] += stats.get("success_count", 0)
+            global_model_stats["prompt_tokens"] += stats.get("prompt_tokens", 0)
+            global_model_stats["completion_tokens"] += stats.get("completion_tokens", 0)
+            global_model_stats["approx_cost"] += stats.get("approx_cost", 0.0)
+    def _preserve_unexpired_cooldowns(
+        self, key: str, data: Dict[str, Any], now_ts: float
+    ) -> None:
+        """
+        Preserve unexpired cooldowns during reset (important for long quota cooldowns).
+        Args:
+            key: Credential identifier (for logging)
+            data: The credential's usage data
+            now_ts: Current timestamp
+        """
+        # Preserve unexpired model cooldowns
+        if "model_cooldowns" in data:
+            active_cooldowns = {
+                model: end_time
+                for model, end_time in data["model_cooldowns"].items()
+                if end_time > now_ts
+            }
+            if active_cooldowns:
+                max_remaining = max(
+                    end_time - now_ts for end_time in active_cooldowns.values()
+                )
+                hours_remaining = max_remaining / 3600
+                lib_logger.info(
+                    f"Preserving {len(active_cooldowns)} active cooldown(s) "
+                    f"for key {mask_credential(key)} during reset "
+                    f"(longest: {hours_remaining:.1f}h remaining)"
+                )
+            data["model_cooldowns"] = active_cooldowns
+        else:
+            data["model_cooldowns"] = {}
+        # Preserve unexpired key-level cooldown
+        if data.get("key_cooldown_until"):
+            if data["key_cooldown_until"] <= now_ts:
+                data["key_cooldown_until"] = None
+            else:
+                hours_remaining = (data["key_cooldown_until"] - now_ts) / 3600
+                lib_logger.info(
+                    f"Preserving key-level cooldown for {mask_credential(key)} "
+                    f"during reset ({hours_remaining:.1f}h remaining)"
+                )
+        else:
+            data["key_cooldown_until"] = None
     def _initialize_key_states(self, keys: List[str]):
         """Initializes state tracking for all provided keys if not already present."""
         for key in keys:
                         priority = credential_priorities.get(key, 999)
                         # Get usage count for load balancing within priority groups
+                        # Uses grouped usage if model is in a quota group
+                        usage_count = self._get_grouped_usage_count(key, model)
                         # Group by priority
                         if priority not in priority_groups:
                 for priority_level in sorted_priorities:
                     keys_in_priority = priority_groups[priority_level]
+                    # Determine selection method based on provider's rotation mode
+                    provider = model.split("/")[0] if "/" in model else ""
+                    rotation_mode = self._get_rotation_mode(provider)
+                    # Calculate effective concurrency based on priority tier
+                    multiplier = self._get_priority_multiplier(
+                        provider, priority_level, rotation_mode
+                    )
+                    effective_max_concurrent = max_concurrent * multiplier
                     # Within each priority group, use existing tier1/tier2 logic
                     tier1_keys, tier2_keys = [], []
                     for key, usage_count in keys_in_priority:
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests
+                        elif (
+                            key_state["models_in_use"].get(model, 0)
+                            < effective_max_concurrent
+                        ):
                             tier2_keys.append((key, usage_count))
+                    if rotation_mode == "sequential":
+                        # Sequential mode: sort credentials by priority, usage, recency
+                        # Keep all candidates in sorted order (no filtering to single key)
+                        selection_method = "sequential"
+                        if tier1_keys:
+                            tier1_keys = self._sort_sequential(
+                                tier1_keys, credential_priorities
+                            )
+                        if tier2_keys:
+                            tier2_keys = self._sort_sequential(
+                                tier2_keys, credential_priorities
+                            )
+                    elif self.rotation_tolerance > 0:
+                        # Balanced mode with weighted randomness
+                        selection_method = "weighted-random"
                         if tier1_keys:
                             selected_key = self._select_weighted_random(
                                 tier1_keys, self.rotation_tolerance
                             ]
                     else:
                         # Deterministic: sort by usage within each tier
+                        selection_method = "least-used"
                         tier1_keys.sort(key=lambda x: x[1])
                         tier2_keys.sort(key=lambda x: x[1])
                         state = self.key_states[key]
                         async with state["lock"]:
                             current_count = state["models_in_use"].get(model, 0)
+                            if current_count < effective_max_concurrent:
                                 state["models_in_use"][model] = current_count + 1
                                 tier_name = (
                                     credential_tier_names.get(key, "unknown")
                                 )
                                 lib_logger.info(
                                     f"Acquired key {mask_credential(key)} for model {model} "
+                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
                                 )
                                 return key
             else:
                 # Original logic when no priorities specified
+                # Determine selection method based on provider's rotation mode
+                provider = model.split("/")[0] if "/" in model else ""
+                rotation_mode = self._get_rotation_mode(provider)
+                # Calculate effective concurrency for default priority (999)
+                # When no priorities are specified, all credentials get default priority
+                default_priority = 999
+                multiplier = self._get_priority_multiplier(
+                    provider, default_priority, rotation_mode
+                )
+                effective_max_concurrent = max_concurrent * multiplier
                 tier1_keys, tier2_keys = [], []
                 # First, filter the list of available keys to exclude any on cooldown.
                             continue
                         # Prioritize keys based on their current usage to ensure load balancing.
+                        # Uses grouped usage if model is in a quota group
+                        usage_count = self._get_grouped_usage_count(key, model)
                         key_state = self.key_states[key]
                         # Tier 1: Completely idle keys (preferred).
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests for this model.
+                        elif (
+                            key_state["models_in_use"].get(model, 0)
+                            < effective_max_concurrent
+                        ):
                             tier2_keys.append((key, usage_count))
+                if rotation_mode == "sequential":
+                    # Sequential mode: sort credentials by priority, usage, recency
+                    # Keep all candidates in sorted order (no filtering to single key)
+                    selection_method = "sequential"
+                    if tier1_keys:
+                        tier1_keys = self._sort_sequential(
+                            tier1_keys, credential_priorities
+                        )
+                    if tier2_keys:
+                        tier2_keys = self._sort_sequential(
+                            tier2_keys, credential_priorities
+                        )
+                elif self.rotation_tolerance > 0:
+                    # Balanced mode with weighted randomness
+                    selection_method = "weighted-random"
                     if tier1_keys:
                         selected_key = self._select_weighted_random(
                             tier1_keys, self.rotation_tolerance
                         ]
                 else:
                     # Deterministic: sort by usage within each tier
+                    selection_method = "least-used"
                     tier1_keys.sort(key=lambda x: x[1])
                     tier2_keys.sort(key=lambda x: x[1])
                     state = self.key_states[key]
                     async with state["lock"]:
                         current_count = state["models_in_use"].get(model, 0)
+                        if current_count < effective_max_concurrent:
                             state["models_in_use"][model] = current_count + 1
                             tier_name = (
                                 credential_tier_names.get(key)
                             tier_info = f"tier: {tier_name}, " if tier_name else ""
                             lib_logger.info(
                                 f"Acquired key {mask_credential(key)} for model {model} "
+                                f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
                             )
                             return key
         """
         Records a successful API call, resetting failure counters.
         It safely handles cases where token usage data is not available.
+        Supports two modes based on provider configuration:
+        - per_model: Each model has its own window_start_ts and stats in key_data["models"]
+        - credential: Legacy mode with key_data["daily"]["models"]
         """
         await self._lazy_init()
         async with self._data_lock:
+            now_ts = time.time()
             today_utc_str = datetime.now(timezone.utc).date().isoformat()
+            reset_config = self._get_usage_reset_config(key)
+            reset_mode = (
+                reset_config.get("mode", "credential") if reset_config else "credential"
             )
+            if reset_mode == "per_model":
+                # New per-model structure
+                key_data = self._usage_data.setdefault(
+                    key,
+                    {
+                        "models": {},
+                        "global": {"models": {}},
+                        "model_cooldowns": {},
+                        "failures": {},
+                    },
+                )
+                # Ensure models dict exists
+                if "models" not in key_data:
+                    key_data["models"] = {}
+                # Get or create per-model data with window tracking
+                model_data = key_data["models"].setdefault(
+                    model,
+                    {
+                        "window_start_ts": None,
+                        "quota_reset_ts": None,
+                        "success_count": 0,
+                        "prompt_tokens": 0,
+                        "completion_tokens": 0,
+                        "approx_cost": 0.0,
+                    },
+                )
+                # Start window on first request for this model
+                if model_data.get("window_start_ts") is None:
+                    model_data["window_start_ts"] = now_ts
+                    # Set expected quota reset time from provider config
+                    window_seconds = (
+                        reset_config.get("window_seconds", 0) if reset_config else 0
+                    )
+                    if window_seconds > 0:
+                        model_data["quota_reset_ts"] = now_ts + window_seconds
+                    window_hours = window_seconds / 3600 if window_seconds else 0
+                    lib_logger.info(
+                        f"Started {window_hours:.1f}h window for model {model} on {mask_credential(key)}"
+                    )
+                # Record stats
+                model_data["success_count"] += 1
+                usage_data_ref = model_data  # For token/cost recording below
+            else:
+                # Legacy credential-level structure
+                key_data = self._usage_data.setdefault(
+                    key,
+                    {
+                        "daily": {"date": today_utc_str, "models": {}},
+                        "global": {"models": {}},
+                        "model_cooldowns": {},
+                        "failures": {},
+                    },
+                )
+                if "last_daily_reset" not in key_data:
+                    key_data["last_daily_reset"] = today_utc_str
+                # Get or create model data in daily structure
+                usage_data_ref = key_data["daily"]["models"].setdefault(
+                    model,
+                    {
+                        "success_count": 0,
+                        "prompt_tokens": 0,
+                        "completion_tokens": 0,
+                        "approx_cost": 0.0,
+                    },
+                )
+                usage_data_ref["success_count"] += 1
+            # Reset failures for this model
             model_failures = key_data.setdefault("failures", {}).setdefault(model, {})
             model_failures["consecutive_failures"] = 0
+            # Clear transient cooldown on success (but NOT quota_reset_ts)
             if model in key_data.get("model_cooldowns", {}):
                 del key_data["model_cooldowns"][model]
+            # Record token and cost usage
             if (
                 completion_response
                 and hasattr(completion_response, "usage")
                 and completion_response.usage
             ):
                 usage = completion_response.usage
+                usage_data_ref["prompt_tokens"] += usage.prompt_tokens
+                usage_data_ref["completion_tokens"] += getattr(
                     usage, "completion_tokens", 0
+                )
                 lib_logger.info(
                     f"Recorded usage from response object for key {mask_credential(key)}"
                 )
                 try:
                     provider_name = model.split("/")[0]
+                    provider_instance = self._get_provider_instance(provider_name)
+                    if provider_instance and getattr(
+                        provider_instance, "skip_cost_calculation", False
                     ):
                         lib_logger.debug(
                             f"Skipping cost calculation for provider '{provider_name}' (custom provider)."
                         )
                     else:
                         if isinstance(completion_response, litellm.EmbeddingResponse):
                             model_info = litellm.get_model_info(model)
                             input_cost = model_info.get("input_cost_per_token")
                             if input_cost:
                             )
                         if cost is not None:
+                            usage_data_ref["approx_cost"] += cost
                 except Exception as e:
                     lib_logger.warning(
                         f"Could not calculate cost for model {model}: {e}"
             elif isinstance(completion_response, asyncio.Future) or hasattr(
                 completion_response, "__aiter__"
             ):
+                pass  # Stream - usage recorded from chunks
             else:
                 lib_logger.warning(
                     f"No usage data found in completion response for model {model}. Recording success without token count."
                 )
+            key_data["last_used_ts"] = now_ts
         await self._save_usage()
         classified_error: ClassifiedError,
         increment_consecutive_failures: bool = True,
     ):
+        """Records a failure and applies cooldowns based on error type.
+        Distinguishes between:
+        - quota_exceeded: Long cooldown with exact reset time (from quota_reset_timestamp)
+          Sets quota_reset_ts on model (and group) - this becomes authoritative stats reset time
+        - rate_limit: Short transient cooldown (just wait and retry)
+          Only sets model_cooldowns - does NOT affect stats reset timing
         Args:
             key: The API key or credential identifier
         """
         await self._lazy_init()
         async with self._data_lock:
+            now_ts = time.time()
             today_utc_str = datetime.now(timezone.utc).date().isoformat()
+            reset_config = self._get_usage_reset_config(key)
+            reset_mode = (
+                reset_config.get("mode", "credential") if reset_config else "credential"
             )
+            # Initialize key data with appropriate structure
+            if reset_mode == "per_model":
+                key_data = self._usage_data.setdefault(
+                    key,
+                    {
+                        "models": {},
+                        "global": {"models": {}},
+                        "model_cooldowns": {},
+                        "failures": {},
+                    },
+                )
+            else:
+                key_data = self._usage_data.setdefault(
+                    key,
+                    {
+                        "daily": {"date": today_utc_str, "models": {}},
+                        "global": {"models": {}},
+                        "model_cooldowns": {},
+                        "failures": {},
+                    },
+                )
             # Provider-level errors (transient issues) should not count against the key
             provider_level_errors = {"server_error", "api_connection"}
             # Calculate cooldown duration based on error type
             cooldown_seconds = None
+            model_cooldowns = key_data.setdefault("model_cooldowns", {})
+            if classified_error.error_type == "quota_exceeded":
+                # Quota exhausted - use authoritative reset timestamp if available
+                quota_reset_ts = classified_error.quota_reset_timestamp
+                cooldown_seconds = classified_error.retry_after or 60
+                if quota_reset_ts and reset_mode == "per_model":
+                    # Set quota_reset_ts on model - this becomes authoritative stats reset time
+                    models_data = key_data.setdefault("models", {})
+                    model_data = models_data.setdefault(
+                        model,
+                        {
+                            "window_start_ts": None,
+                            "quota_reset_ts": None,
+                            "success_count": 0,
+                            "prompt_tokens": 0,
+                            "completion_tokens": 0,
+                            "approx_cost": 0.0,
+                        },
+                    )
+                    model_data["quota_reset_ts"] = quota_reset_ts
+                    # Apply to all models in the same quota group
+                    group = self._get_model_quota_group(key, model)
+                    if group:
+                        grouped_models = self._get_grouped_models(key, group)
+                        for grouped_model in grouped_models:
+                            group_model_data = models_data.setdefault(
+                                grouped_model,
+                                {
+                                    "window_start_ts": None,
+                                    "quota_reset_ts": None,
+                                    "success_count": 0,
+                                    "prompt_tokens": 0,
+                                    "completion_tokens": 0,
+                                    "approx_cost": 0.0,
+                                },
+                            )
+                            group_model_data["quota_reset_ts"] = quota_reset_ts
+                            # Also set transient cooldown for selection logic
+                            model_cooldowns[grouped_model] = quota_reset_ts
+                        reset_dt = datetime.fromtimestamp(
+                            quota_reset_ts, tz=timezone.utc
+                        )
+                        lib_logger.info(
+                            f"Quota exhausted for group '{group}' ({len(grouped_models)} models) "
+                            f"on {mask_credential(key)}. Resets at {reset_dt.isoformat()}"
+                        )
+                    else:
+                        reset_dt = datetime.fromtimestamp(
+                            quota_reset_ts, tz=timezone.utc
+                        )
+                        hours = (quota_reset_ts - now_ts) / 3600
+                        lib_logger.info(
+                            f"Quota exhausted for model {model} on {mask_credential(key)}. "
+                            f"Resets at {reset_dt.isoformat()} ({hours:.1f}h)"
+                        )
+                    # Set transient cooldown for selection logic
+                    model_cooldowns[model] = quota_reset_ts
+                else:
+                    # No authoritative timestamp or legacy mode - just use retry_after
+                    model_cooldowns[model] = now_ts + cooldown_seconds
+                    hours = cooldown_seconds / 3600
+                    lib_logger.info(
+                        f"Quota exhausted on {mask_credential(key)} for model {model}. "
+                        f"Cooldown: {cooldown_seconds}s ({hours:.1f}h)"
+                    )
+            elif classified_error.error_type == "rate_limit":
+                # Transient rate limit - just set short cooldown (does NOT set quota_reset_ts)
                 cooldown_seconds = classified_error.retry_after or 60
+                model_cooldowns[model] = now_ts + cooldown_seconds
                 lib_logger.info(
+                    f"Rate limit on {mask_credential(key)} for model {model}. "
+                    f"Transient cooldown: {cooldown_seconds}s"
                 )
             elif classified_error.error_type == "authentication":
                 # Apply a 5-minute key-level lockout for auth errors
+                key_data["key_cooldown_until"] = now_ts + 300
+                cooldown_seconds = 300
+                model_cooldowns[model] = now_ts + cooldown_seconds
                 lib_logger.warning(
                     f"Authentication error on key {mask_credential(key)}. Applying 5-minute key-level lockout."
                 )
             # If we should increment failures, calculate escalating backoff
             if should_increment:
                 # If cooldown wasn't set by specific error type, use escalating backoff
                 if cooldown_seconds is None:
                     backoff_tiers = {1: 10, 2: 30, 3: 60, 4: 120}
+                    cooldown_seconds = backoff_tiers.get(count, 7200)
+                    model_cooldowns[model] = now_ts + cooldown_seconds
                     lib_logger.warning(
                         f"Failure #{count} for key {mask_credential(key)} with model {model}. "
+                        f"Error type: {classified_error.error_type}, cooldown: {cooldown_seconds}s"
                     )
             else:
                 # Provider-level errors: apply short cooldown but don't count against key
                 if cooldown_seconds is None:
+                    cooldown_seconds = 30
+                    model_cooldowns[model] = now_ts + cooldown_seconds
                 lib_logger.info(
+                    f"Provider-level error ({classified_error.error_type}) for key {mask_credential(key)} "
+                    f"with model {model}. NOT incrementing failures. Cooldown: {cooldown_seconds}s"
                 )
             # Check for key-level lockout condition
             await self._check_key_lockout(key, key_data)
             key_data["last_failure"] = {
+                "timestamp": now_ts,
                 "model": model,
                 "error": str(classified_error.original_exception),
             }