Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Dec 7, 2025

Commit

3c52746

1 Parent(s): fd01482

refactor(providers): 🔨 centralize tier and quota configuration in ProviderInterface

Consolidate provider-specific tier prioritization, usage reset configuration, and quota group logic into the base ProviderInterface class to eliminate code duplication and establish a single source of truth.

- Introduce UsageResetConfigDef dataclass for declarative usage configuration
- Add tier_priorities, usage_reset_configs, and model_quota_groups as class attributes
- Implement centralized _resolve_tier_priority() and _build_usage_reset_config() methods
- Move get_credential_priority() and get_usage_reset_config() logic to base class
- Add environment variable override support for quota groups (QUOTA_GROUPS_{PROVIDER}_{GROUP})
- Remove duplicate priority/usage logic from AntigravityProvider and GeminiCliProvider
- Update .env.example with comprehensive documentation for quota group configuration

This refactoring allows providers to define their tier system, usage windows, and quota groups purely through class attributes, while the base class handles all resolution logic. Providers now only need to override get_credential_tier_name() for tier lookup.

Files changed (4) hide show

.env.example +20 -0
src/rotator_library/providers/antigravity_provider.py +47 -137
src/rotator_library/providers/gemini_cli_provider.py +36 -34
src/rotator_library/providers/provider_interface.py +233 -37

.env.example CHANGED Viewed

@@ -185,6 +185,26 @@ MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
 # ROTATION_MODE_GEMINI=balanced
 # ROTATION_MODE_ANTIGRAVITY=sequential
 # ------------------------------------------------------------------------------
 # | [ADVANCED] Proxy Configuration                                             |
 # ------------------------------------------------------------------------------

 # ROTATION_MODE_GEMINI=balanced
 # ROTATION_MODE_ANTIGRAVITY=sequential
+# --- Model Quota Groups ---
+# Models that share quota/cooldown timing. When one model in a group hits
+# quota exhausted (429), all models in the group receive the same cooldown timestamp.
+# They also reset (archive stats) together when the quota period expires.
+#
+# This is useful for providers where multiple model variants share the same
+# underlying quota (e.g., Claude Sonnet and Opus on Antigravity).
+#
+# Format: QUOTA_GROUPS_<PROVIDER>_<GROUP>="model1,model2,model3"
+#
+# To DISABLE a default group, set it to empty string:
+#   QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
+#
+# Default groups:
+#   ANTIGRAVITY.CLAUDE: claude-sonnet-4-5,claude-opus-4-5
+#
+# Examples:
+# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
+# QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
 # ------------------------------------------------------------------------------
 # | [ADVANCED] Proxy Configuration                                             |
 # ------------------------------------------------------------------------------

src/rotator_library/providers/antigravity_provider.py CHANGED Viewed

@@ -34,7 +34,7 @@ from urllib.parse import urlparse
 import httpx
 import litellm
-from .provider_interface import ProviderInterface
 from .antigravity_auth_base import AntigravityAuthBase
 from .provider_cache import ProviderCache
 from ..model_definitions import ModelDefinitions
@@ -497,6 +497,52 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
     # Sequential mode by default - preserves thinking signature caches between requests
     default_rotation_mode: str = "sequential"
     @staticmethod
     def parse_quota_error(
         error: Exception, error_body: Optional[str] = None
@@ -733,43 +779,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
             f"claude_fix={self._enable_claude_tool_fix}, thinking_sanitization={self._enable_thinking_sanitization}"
         )
-    # =========================================================================
-    # CREDENTIAL PRIORITIZATION
-    # =========================================================================
-    def get_credential_priority(self, credential: str) -> Optional[int]:
-        """
-        Returns priority based on Antigravity tier.
-        Paid tiers: priority 1 (highest)
-        Free tier: priority 2
-        Legacy/Unknown: priority 10 (lowest)
-        Args:
-            credential: The credential path
-        Returns:
-            Priority level (1-10) or None if tier not yet discovered
-        """
-        tier = self.project_tier_cache.get(credential)
-        # Lazy load from file if not in cache
-        if not tier:
-            tier = self._load_tier_from_file(credential)
-        if not tier:
-            return None  # Not yet discovered
-        # Paid tiers get highest priority
-        if tier not in ["free-tier", "legacy-tier", "unknown"]:
-            return 1
-        # Free tier gets lower priority
-        if tier == "free-tier":
-            return 2
-        # Legacy and unknown get even lower
-        return 10
     def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
         """
         Load tier from credential file's _proxy_metadata and cache it.
@@ -837,105 +846,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
         """
         return None
-    def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
-        """
-        Get Antigravity-specific usage tracking configuration based on credential tier.
-        Antigravity uses per-model windows with different durations by tier:
-        - Paid tiers (priority 1): 5-hour per-model window
-        - Free tier (priority 2): 7-day per-model window
-        - Unknown/legacy: 7-day per-model window (conservative default)
-        When a model hits a quota_exhausted 429 error with exact reset timestamp,
-        that timestamp becomes the authoritative reset time for the model (and its group).
-        Args:
-            credential: The credential path
-        Returns:
-            Usage reset configuration dict with mode="per_model"
-        """
-        tier = self.project_tier_cache.get(credential)
-        if not tier:
-            tier = self._load_tier_from_file(credential)
-        # Paid tiers: 5-hour per-model window
-        if tier and tier not in ["free-tier", "legacy-tier", "unknown"]:
-            return {
-                "window_seconds": 5 * 60 * 60,  # 18000 seconds = 5 hours
-                "mode": "per_model",
-                "priority": 1,
-                "description": "5-hour per-model window (paid tier)",
-            }
-        # Free tier: 7-day per-model window
-        if tier == "free-tier":
-            return {
-                "window_seconds": 7 * 24 * 60 * 60,  # 604800 seconds = 7 days
-                "mode": "per_model",
-                "priority": 2,
-                "description": "7-day per-model window (free tier)",
-            }
-        # Unknown/legacy: use 7-day per-model window as conservative default
-        return {
-            "window_seconds": 7 * 24 * 60 * 60,  # 604800 seconds = 7 days
-            "mode": "per_model",
-            "priority": 10,
-            "description": "7-day per-model window (unknown tier - conservative default)",
-        }
-    def get_default_usage_field_name(self) -> str:
-        """
-        Get the default usage tracking field name for Antigravity.
-        Returns:
-            "models" for per-model tracking
-        """
-        return "models"
-    # =========================================================================
-    # Model Quota Grouping
-    # =========================================================================
-    # Models that share quota timing - when one hits quota, all get same reset time
-    QUOTA_GROUPS = {
-        # Future: add claude/gemini groups if they share quota
-    }
-    def get_model_quota_group(self, model: str) -> Optional[str]:
-        """
-        Returns the quota group name for a model.
-        Claude models (sonnet and opus) share quota on Antigravity.
-        When one hits quota exhausted, all models in the group get the same reset time.
-        Args:
-            model: Model name (with or without "antigravity/" prefix)
-        Returns:
-            Group name ("claude") or None if not grouped
-        """
-        # Remove provider prefix if present
-        clean_model = model.replace("antigravity/", "")
-        for group_name, models in self.QUOTA_GROUPS.items():
-            if clean_model in models:
-                return group_name
-        return None
-    def get_models_in_quota_group(self, group: str) -> List[str]:
-        """
-        Returns all model names in a quota group.
-        Args:
-            group: Group name (e.g., "claude")
-        Returns:
-            List of model names (without provider prefix)
-        """
-        return self.QUOTA_GROUPS.get(group, [])
     async def initialize_credentials(self, credential_paths: List[str]) -> None:
         """
         Load persisted tier information from credential files at startup.

 import httpx
 import litellm
+from .provider_interface import ProviderInterface, UsageResetConfigDef, QuotaGroupMap
 from .antigravity_auth_base import AntigravityAuthBase
 from .provider_cache import ProviderCache
 from ..model_definitions import ModelDefinitions
     # Sequential mode by default - preserves thinking signature caches between requests
     default_rotation_mode: str = "sequential"
+    # =========================================================================
+    # TIER & USAGE CONFIGURATION
+    # =========================================================================
+    # Provider name for env var lookups (QUOTA_GROUPS_ANTIGRAVITY_*)
+    provider_env_name: str = "antigravity"
+    # Tier name -> priority mapping (Single Source of Truth)
+    # Lower numbers = higher priority
+    tier_priorities = {
+        # Priority 1: Highest paid tier (Google AI Ultra - name unconfirmed)
+        # "google-ai-ultra": 1,  # Uncomment when tier name is confirmed
+        # Priority 2: Standard paid tier
+        "standard-tier": 2,
+        # Priority 3: Free tier
+        "free-tier": 3,
+        # Priority 10: Legacy/Unknown (lowest)
+        "legacy-tier": 10,
+        "unknown": 10,
+    }
+    # Default priority for tiers not in the mapping
+    default_tier_priority: int = 10
+    # Usage reset configs keyed by priority sets
+    # Priorities 1-2 (paid tiers) get 5h window, others get 7d window
+    usage_reset_configs = {
+        frozenset({1, 2}): UsageResetConfigDef(
+            window_seconds=5 * 60 * 60,  # 5 hours
+            mode="per_model",
+            description="5-hour per-model window (paid tier)",
+            field_name="models",
+        ),
+        "default": UsageResetConfigDef(
+            window_seconds=7 * 24 * 60 * 60,  # 7 days
+            mode="per_model",
+            description="7-day per-model window (free/unknown tier)",
+            field_name="models",
+        ),
+    }
+    # Model quota groups (can be overridden via QUOTA_GROUPS_ANTIGRAVITY_CLAUDE)
+    model_quota_groups: QuotaGroupMap = {
+        # "claude": ["claude-sonnet-4-5", "claude-opus-4-5"],
+    }
     @staticmethod
     def parse_quota_error(
         error: Exception, error_body: Optional[str] = None
             f"claude_fix={self._enable_claude_tool_fix}, thinking_sanitization={self._enable_thinking_sanitization}"
         )
     def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
         """
         Load tier from credential file's _proxy_metadata and cache it.
         """
         return None
     async def initialize_credentials(self, credential_paths: List[str]) -> None:
         """
         Load persisted tier information from credential files at startup.

src/rotator_library/providers/gemini_cli_provider.py CHANGED Viewed

@@ -189,6 +189,36 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     # Balanced by default - Gemini CLI has short cooldowns (seconds, not hours)
     default_rotation_mode: str = "balanced"
     @staticmethod
     def parse_quota_error(
         error: Exception, error_body: Optional[str] = None
@@ -264,41 +294,13 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
         )
     # =========================================================================
-    # CREDENTIAL PRIORITIZATION
     # =========================================================================
-    def get_credential_priority(self, credential: str) -> Optional[int]:
-        """
-        Returns priority based on Gemini tier.
-        Paid tiers: priority 1 (highest)
-        Free/Legacy tiers: priority 2
-        Unknown: priority 10 (lowest)
-        Args:
-            credential: The credential path
-        Returns:
-            Priority level (1-10) or None if tier not yet discovered
-        """
-        tier = self.project_tier_cache.get(credential)
-        # Lazy load from file if not in cache
-        if not tier:
-            tier = self._load_tier_from_file(credential)
-        if not tier:
-            return None  # Not yet discovered
-        # Paid tiers get highest priority
-        if tier not in ["free-tier", "legacy-tier", "unknown"]:
-            return 1
-        # Free tier gets lower priority
-        if tier == "free-tier":
-            return 2
-        # Legacy and unknown get even lower
-        return 10
     def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
         """

     # Balanced by default - Gemini CLI has short cooldowns (seconds, not hours)
     default_rotation_mode: str = "balanced"
+    # =========================================================================
+    # TIER CONFIGURATION
+    # =========================================================================
+    # Provider name for env var lookups (QUOTA_GROUPS_GEMINI_CLI_*)
+    provider_env_name: str = "gemini_cli"
+    # Tier name -> priority mapping (Single Source of Truth)
+    # Same tier names as Antigravity (coincidentally), but defined separately
+    tier_priorities = {
+        # Priority 1: Highest paid tier (Google AI Ultra - name unconfirmed)
+        # "google-ai-ultra": 1,  # Uncomment when tier name is confirmed
+        # Priority 2: Standard paid tier
+        "standard-tier": 2,
+        # Priority 3: Free tier
+        "free-tier": 3,
+        # Priority 10: Legacy/Unknown (lowest)
+        "legacy-tier": 10,
+        "unknown": 10,
+    }
+    # Default priority for tiers not in the mapping
+    default_tier_priority: int = 10
+    # Gemini CLI uses default daily reset - no custom usage_reset_configs
+    # (Empty dict means inherited get_usage_reset_config returns None)
+    # No quota groups defined for Gemini CLI
+    # (Models don't share quotas)
     @staticmethod
     def parse_quota_error(
         error: Exception, error_body: Optional[str] = None
         )
     # =========================================================================
+    # CREDENTIAL TIER LOOKUP (Provider-specific - uses cache)
+    # =========================================================================
+    #
+    # NOTE: get_credential_priority() is now inherited from ProviderInterface.
+    # It uses get_credential_tier_name() to get the tier and resolve priority
+    # from the tier_priorities class attribute.
     # =========================================================================
     def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
         """

src/rotator_library/providers/provider_interface.py CHANGED Viewed

@@ -1,10 +1,46 @@
 from abc import ABC, abstractmethod
-from typing import List, Dict, Any, Optional, AsyncGenerator, Union
 import os
 import httpx
 import litellm
 class ProviderInterface(ABC):
     """
     An interface for API provider-specific functionality, including model
@@ -18,6 +54,40 @@ class ProviderInterface(ABC):
     # - "sequential": Use one credential until exhausted, then switch to next
     default_rotation_mode: str = "balanced"
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
@@ -87,28 +157,50 @@ class ProviderInterface(ABC):
         pass
     # [NEW] Credential Prioritization System
     def get_credential_priority(self, credential: str) -> Optional[int]:
         """
         Returns the priority level for a credential.
         Lower numbers = higher priority (1 is highest).
-        Returns None if provider doesn't use priorities.
-        This allows providers to auto-detect credential tiers (e.g., paid vs free)
-        and ensure higher-tier credentials are always tried first.
         Args:
             credential: The credential identifier (API key or path)
         Returns:
-            Priority level (1-10) or None if no priority system
-        Example:
-            For Gemini CLI:
-            - Paid tier credentials: priority 1 (highest)
-            - Free tier credentials: priority 2
-            - Unknown tier: priority 10 (lowest)
         """
-        return None
     def get_model_tier_requirement(self, model: str) -> Optional[int]:
         """
@@ -211,12 +303,76 @@ class ProviderInterface(ABC):
     # Per-Provider Usage Tracking Configuration
     # =========================================================================
     def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
         """
         Get provider-specific usage tracking configuration for a credential.
-        This allows providers to define custom usage reset windows based on
-        credential tier (e.g., paid vs free accounts with different quota periods).
         The UsageManager will use this configuration to:
         1. Track usage per-model or per-credential based on mode
@@ -231,7 +387,7 @@ class ProviderInterface(ABC):
             {
                 "window_seconds": int,     # Duration in seconds (e.g., 18000 for 5h)
                 "mode": str,               # "credential" or "per_model"
-                "priority": int,           # Priority level this config applies to
                 "description": str,        # Human-readable description (for logging)
             }
@@ -242,25 +398,9 @@ class ProviderInterface(ABC):
               from first request of THAT model. Models reset independently unless
               grouped. If a quota_exhausted error provides exact reset time, that
               becomes the authoritative reset time for the model.
-        Examples:
-            Antigravity paid tier (per-model):
-            {
-                "window_seconds": 18000,   # 5 hours
-                "mode": "per_model",
-                "priority": 1,
-                "description": "5-hour per-model window (paid tier)"
-            }
-            Default provider (credential-level):
-            {
-                "window_seconds": 86400,   # 24 hours
-                "mode": "credential",
-                "priority": 1,
-                "description": "24-hour credential window"
-            }
         """
-        return None  # Default: use daily reset at daily_reset_time_utc
     def get_default_usage_field_name(self) -> str:
         """
@@ -278,16 +418,68 @@ class ProviderInterface(ABC):
     # Model Quota Grouping
     # =========================================================================
     def get_model_quota_group(self, model: str) -> Optional[str]:
         """
         Returns the quota group name for a model, or None if not grouped.
         Models in the same quota group share cooldown timing - when one model
         hits a quota exhausted error, all models in the group get the same
         reset timestamp. They also reset (archive stats) together.
-        This is useful for providers where multiple model variants share the
-        same underlying quota (e.g., Claude Sonnet and Opus on Antigravity).
         Args:
             model: Model name (with or without provider prefix)
@@ -295,12 +487,16 @@ class ProviderInterface(ABC):
         Returns:
             Group name string (e.g., "claude") or None if model is not grouped
         """
-        return None
     def get_models_in_quota_group(self, group: str) -> List[str]:
         """
         Returns all model names that belong to a quota group.
         Args:
             group: Group name (e.g., "claude")
@@ -308,4 +504,4 @@ class ProviderInterface(ABC):
             List of model names (WITHOUT provider prefix) in the group.
             Empty list if group doesn't exist.
         """
-        return []

 from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional, AsyncGenerator, Union, FrozenSet
 import os
 import httpx
 import litellm
+# =============================================================================
+# TIER & USAGE CONFIGURATION TYPES
+# =============================================================================
+@dataclass(frozen=True)
+class UsageResetConfigDef:
+    """
+    Definition for usage reset configuration per tier type.
+    Providers define these as class attributes to specify how usage stats
+    should reset based on credential tier (paid vs free).
+    Attributes:
+        window_seconds: Duration of the usage tracking window in seconds.
+        mode: Either "credential" (one window per credential) or "per_model"
+              (separate window per model or model group).
+        description: Human-readable description for logging.
+        field_name: The key used in usage data JSON structure.
+                    Typically "models" for per_model mode, "daily" for credential mode.
+    """
+    window_seconds: int
+    mode: str  # "credential" or "per_model"
+    description: str
+    field_name: str = "daily"  # Default for backwards compatibility
+# Type aliases for provider configuration
+TierPriorityMap = Dict[str, int]  # tier_name -> priority
+UsageConfigKey = Union[FrozenSet[int], str]  # frozenset of priorities OR "default"
+UsageConfigMap = Dict[UsageConfigKey, UsageResetConfigDef]  # priority_set -> config
+QuotaGroupMap = Dict[str, List[str]]  # group_name -> [models]
 class ProviderInterface(ABC):
     """
     An interface for API provider-specific functionality, including model
     # - "sequential": Use one credential until exhausted, then switch to next
     default_rotation_mode: str = "balanced"
+    # =========================================================================
+    # TIER CONFIGURATION - Override in subclass
+    # =========================================================================
+    # Provider name for env var lookups (e.g., "antigravity", "gemini_cli")
+    # Used for: QUOTA_GROUPS_{provider_env_name}_{GROUP}
+    provider_env_name: str = ""
+    # Tier name -> priority mapping (Single Source of Truth)
+    # Lower numbers = higher priority (1 is highest)
+    # Multiple tiers can map to the same priority
+    # Unknown tiers fall back to default_tier_priority
+    tier_priorities: TierPriorityMap = {}
+    # Default priority for tiers not in tier_priorities mapping
+    default_tier_priority: int = 10
+    # =========================================================================
+    # USAGE RESET CONFIGURATION - Override in subclass
+    # =========================================================================
+    # Usage reset configurations keyed by priority sets
+    # Keys: frozenset of priority values (e.g., frozenset({1, 2})) OR "default"
+    # The "default" key is used for any priority not matched by a frozenset
+    usage_reset_configs: UsageConfigMap = {}
+    # =========================================================================
+    # MODEL QUOTA GROUPS - Override in subclass
+    # =========================================================================
+    # Models that share quota/cooldown timing
+    # Can be overridden via env: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
+    model_quota_groups: QuotaGroupMap = {}
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
         pass
     # [NEW] Credential Prioritization System
+    # =========================================================================
+    # TIER RESOLUTION LOGIC (Centralized)
+    # =========================================================================
+    def _resolve_tier_priority(self, tier_name: Optional[str]) -> int:
+        """
+        Resolve priority for a tier name using provider's tier_priorities mapping.
+        Args:
+            tier_name: The tier name string (e.g., "free-tier", "standard-tier")
+        Returns:
+            Priority level from tier_priorities, or default_tier_priority if
+            tier_name is None or not found in the mapping.
+        """
+        if tier_name is None:
+            return self.default_tier_priority
+        return self.tier_priorities.get(tier_name, self.default_tier_priority)
     def get_credential_priority(self, credential: str) -> Optional[int]:
         """
         Returns the priority level for a credential.
         Lower numbers = higher priority (1 is highest).
+        Returns None if tier not yet discovered.
+        Uses the provider's tier_priorities mapping to resolve priority from
+        tier name. Unknown tiers fall back to default_tier_priority.
+        Subclasses should:
+        1. Define tier_priorities dict with all known tier names
+        2. Override get_credential_tier_name() for tier lookup
+        Do NOT override this method.
         Args:
             credential: The credential identifier (API key or path)
         Returns:
+            Priority level (1-10) or None if tier not yet discovered
         """
+        tier = self.get_credential_tier_name(credential)
+        if tier is None:
+            return None  # Tier not yet discovered
+        return self._resolve_tier_priority(tier)
     def get_model_tier_requirement(self, model: str) -> Optional[int]:
         """
     # Per-Provider Usage Tracking Configuration
     # =========================================================================
+    # =========================================================================
+    # USAGE RESET CONFIG LOGIC (Centralized)
+    # =========================================================================
+    def _find_usage_config_for_priority(
+        self, priority: int
+    ) -> Optional[UsageResetConfigDef]:
+        """
+        Find usage config that applies to a priority value.
+        Checks frozenset keys first (priority must be in the set),
+        then falls back to "default" key if no match found.
+        Args:
+            priority: The credential priority level
+        Returns:
+            UsageResetConfigDef if found, None otherwise
+        """
+        # First, check frozenset keys for explicit priority match
+        for key, config in self.usage_reset_configs.items():
+            if isinstance(key, frozenset) and priority in key:
+                return config
+        # Fall back to "default" key
+        return self.usage_reset_configs.get("default")
+    def _build_usage_reset_config(
+        self, tier_name: Optional[str]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Build usage reset configuration dict for a tier.
+        Resolves tier to priority, then finds matching usage config.
+        Returns None if provider doesn't define usage_reset_configs.
+        Args:
+            tier_name: The tier name string
+        Returns:
+            Usage config dict with window_seconds, mode, priority, description,
+            field_name, or None if no config applies
+        """
+        if not self.usage_reset_configs:
+            return None
+        priority = self._resolve_tier_priority(tier_name)
+        config = self._find_usage_config_for_priority(priority)
+        if config is None:
+            return None
+        return {
+            "window_seconds": config.window_seconds,
+            "mode": config.mode,
+            "priority": priority,
+            "description": config.description,
+            "field_name": config.field_name,
+        }
     def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
         """
         Get provider-specific usage tracking configuration for a credential.
+        Uses the provider's usage_reset_configs class attribute to build
+        the configuration dict. Priority is auto-derived from tier.
+        Subclasses should define usage_reset_configs as a class attribute
+        instead of overriding this method. Only override get_credential_tier_name()
+        to provide the tier lookup mechanism.
         The UsageManager will use this configuration to:
         1. Track usage per-model or per-credential based on mode
             {
                 "window_seconds": int,     # Duration in seconds (e.g., 18000 for 5h)
                 "mode": str,               # "credential" or "per_model"
+                "priority": int,           # Priority level (auto-derived from tier)
                 "description": str,        # Human-readable description (for logging)
             }
               from first request of THAT model. Models reset independently unless
               grouped. If a quota_exhausted error provides exact reset time, that
               becomes the authoritative reset time for the model.
         """
+        tier = self.get_credential_tier_name(credential)
+        return self._build_usage_reset_config(tier)
     def get_default_usage_field_name(self) -> str:
         """
     # Model Quota Grouping
     # =========================================================================
+    # =========================================================================
+    # QUOTA GROUPS LOGIC (Centralized)
+    # =========================================================================
+    def _get_effective_quota_groups(self) -> QuotaGroupMap:
+        """
+        Get quota groups with .env overrides applied.
+        Env format: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
+        Set empty string to disable a default group.
+        """
+        if not self.provider_env_name or not self.model_quota_groups:
+            return self.model_quota_groups
+        result: QuotaGroupMap = {}
+        for group_name, default_models in self.model_quota_groups.items():
+            env_key = (
+                f"QUOTA_GROUPS_{self.provider_env_name.upper()}_{group_name.upper()}"
+            )
+            env_value = os.getenv(env_key)
+            if env_value is not None:
+                # Env override present
+                if env_value.strip():
+                    # Parse comma-separated models
+                    result[group_name] = [
+                        m.strip() for m in env_value.split(",") if m.strip()
+                    ]
+                # Empty string = group disabled, don't add to result
+            else:
+                # Use default
+                result[group_name] = list(default_models)
+        return result
+    def _find_model_quota_group(self, model: str) -> Optional[str]:
+        """Find which quota group a model belongs to."""
+        groups = self._get_effective_quota_groups()
+        for group_name, models in groups.items():
+            if model in models:
+                return group_name
+        return None
+    def _get_quota_group_models(self, group: str) -> List[str]:
+        """Get all models in a quota group."""
+        groups = self._get_effective_quota_groups()
+        return groups.get(group, [])
     def get_model_quota_group(self, model: str) -> Optional[str]:
         """
         Returns the quota group name for a model, or None if not grouped.
+        Uses the provider's model_quota_groups class attribute with .env overrides
+        via QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2".
         Models in the same quota group share cooldown timing - when one model
         hits a quota exhausted error, all models in the group get the same
         reset timestamp. They also reset (archive stats) together.
+        Subclasses should define model_quota_groups as a class attribute
+        instead of overriding this method.
         Args:
             model: Model name (with or without provider prefix)
         Returns:
             Group name string (e.g., "claude") or None if model is not grouped
         """
+        # Strip provider prefix if present
+        clean_model = model.split("/")[-1] if "/" in model else model
+        return self._find_model_quota_group(clean_model)
     def get_models_in_quota_group(self, group: str) -> List[str]:
         """
         Returns all model names that belong to a quota group.
+        Uses the provider's model_quota_groups class attribute with .env overrides.
         Args:
             group: Group name (e.g., "claude")
             List of model names (WITHOUT provider prefix) in the group.
             Empty list if group doesn't exist.
         """
+        return self._get_quota_group_models(group)