Mirrowel commited on
Commit
3c52746
·
1 Parent(s): fd01482

refactor(providers): 🔨 centralize tier and quota configuration in ProviderInterface

Browse files

Consolidate provider-specific tier prioritization, usage reset configuration, and quota group logic into the base ProviderInterface class to eliminate code duplication and establish a single source of truth.

- Introduce UsageResetConfigDef dataclass for declarative usage configuration
- Add tier_priorities, usage_reset_configs, and model_quota_groups as class attributes
- Implement centralized _resolve_tier_priority() and _build_usage_reset_config() methods
- Move get_credential_priority() and get_usage_reset_config() logic to base class
- Add environment variable override support for quota groups (QUOTA_GROUPS_{PROVIDER}_{GROUP})
- Remove duplicate priority/usage logic from AntigravityProvider and GeminiCliProvider
- Update .env.example with comprehensive documentation for quota group configuration

This refactoring allows providers to define their tier system, usage windows, and quota groups purely through class attributes, while the base class handles all resolution logic. Providers now only need to override get_credential_tier_name() for tier lookup.

.env.example CHANGED
@@ -185,6 +185,26 @@ MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
185
  # ROTATION_MODE_GEMINI=balanced
186
  # ROTATION_MODE_ANTIGRAVITY=sequential
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  # ------------------------------------------------------------------------------
189
  # | [ADVANCED] Proxy Configuration |
190
  # ------------------------------------------------------------------------------
 
185
  # ROTATION_MODE_GEMINI=balanced
186
  # ROTATION_MODE_ANTIGRAVITY=sequential
187
 
188
+ # --- Model Quota Groups ---
189
+ # Models that share quota/cooldown timing. When one model in a group hits
190
+ # quota exhausted (429), all models in the group receive the same cooldown timestamp.
191
+ # They also reset (archive stats) together when the quota period expires.
192
+ #
193
+ # This is useful for providers where multiple model variants share the same
194
+ # underlying quota (e.g., Claude Sonnet and Opus on Antigravity).
195
+ #
196
+ # Format: QUOTA_GROUPS_<PROVIDER>_<GROUP>="model1,model2,model3"
197
+ #
198
+ # To DISABLE a default group, set it to empty string:
199
+ # QUOTA_GROUPS_ANTIGRAVITY_CLAUDE=""
200
+ #
201
+ # Default groups:
202
+ # ANTIGRAVITY.CLAUDE: claude-sonnet-4-5,claude-opus-4-5
203
+ #
204
+ # Examples:
205
+ # QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
206
+ # QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
207
+
208
  # ------------------------------------------------------------------------------
209
  # | [ADVANCED] Proxy Configuration |
210
  # ------------------------------------------------------------------------------
src/rotator_library/providers/antigravity_provider.py CHANGED
@@ -34,7 +34,7 @@ from urllib.parse import urlparse
34
  import httpx
35
  import litellm
36
 
37
- from .provider_interface import ProviderInterface
38
  from .antigravity_auth_base import AntigravityAuthBase
39
  from .provider_cache import ProviderCache
40
  from ..model_definitions import ModelDefinitions
@@ -497,6 +497,52 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
497
  # Sequential mode by default - preserves thinking signature caches between requests
498
  default_rotation_mode: str = "sequential"
499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  @staticmethod
501
  def parse_quota_error(
502
  error: Exception, error_body: Optional[str] = None
@@ -733,43 +779,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
733
  f"claude_fix={self._enable_claude_tool_fix}, thinking_sanitization={self._enable_thinking_sanitization}"
734
  )
735
 
736
- # =========================================================================
737
- # CREDENTIAL PRIORITIZATION
738
- # =========================================================================
739
-
740
- def get_credential_priority(self, credential: str) -> Optional[int]:
741
- """
742
- Returns priority based on Antigravity tier.
743
- Paid tiers: priority 1 (highest)
744
- Free tier: priority 2
745
- Legacy/Unknown: priority 10 (lowest)
746
-
747
- Args:
748
- credential: The credential path
749
-
750
- Returns:
751
- Priority level (1-10) or None if tier not yet discovered
752
- """
753
- tier = self.project_tier_cache.get(credential)
754
-
755
- # Lazy load from file if not in cache
756
- if not tier:
757
- tier = self._load_tier_from_file(credential)
758
-
759
- if not tier:
760
- return None # Not yet discovered
761
-
762
- # Paid tiers get highest priority
763
- if tier not in ["free-tier", "legacy-tier", "unknown"]:
764
- return 1
765
-
766
- # Free tier gets lower priority
767
- if tier == "free-tier":
768
- return 2
769
-
770
- # Legacy and unknown get even lower
771
- return 10
772
-
773
  def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
774
  """
775
  Load tier from credential file's _proxy_metadata and cache it.
@@ -837,105 +846,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
837
  """
838
  return None
839
 
840
- def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
841
- """
842
- Get Antigravity-specific usage tracking configuration based on credential tier.
843
-
844
- Antigravity uses per-model windows with different durations by tier:
845
- - Paid tiers (priority 1): 5-hour per-model window
846
- - Free tier (priority 2): 7-day per-model window
847
- - Unknown/legacy: 7-day per-model window (conservative default)
848
-
849
- When a model hits a quota_exhausted 429 error with exact reset timestamp,
850
- that timestamp becomes the authoritative reset time for the model (and its group).
851
-
852
- Args:
853
- credential: The credential path
854
-
855
- Returns:
856
- Usage reset configuration dict with mode="per_model"
857
- """
858
- tier = self.project_tier_cache.get(credential)
859
- if not tier:
860
- tier = self._load_tier_from_file(credential)
861
-
862
- # Paid tiers: 5-hour per-model window
863
- if tier and tier not in ["free-tier", "legacy-tier", "unknown"]:
864
- return {
865
- "window_seconds": 5 * 60 * 60, # 18000 seconds = 5 hours
866
- "mode": "per_model",
867
- "priority": 1,
868
- "description": "5-hour per-model window (paid tier)",
869
- }
870
-
871
- # Free tier: 7-day per-model window
872
- if tier == "free-tier":
873
- return {
874
- "window_seconds": 7 * 24 * 60 * 60, # 604800 seconds = 7 days
875
- "mode": "per_model",
876
- "priority": 2,
877
- "description": "7-day per-model window (free tier)",
878
- }
879
-
880
- # Unknown/legacy: use 7-day per-model window as conservative default
881
- return {
882
- "window_seconds": 7 * 24 * 60 * 60, # 604800 seconds = 7 days
883
- "mode": "per_model",
884
- "priority": 10,
885
- "description": "7-day per-model window (unknown tier - conservative default)",
886
- }
887
-
888
- def get_default_usage_field_name(self) -> str:
889
- """
890
- Get the default usage tracking field name for Antigravity.
891
-
892
- Returns:
893
- "models" for per-model tracking
894
- """
895
- return "models"
896
-
897
- # =========================================================================
898
- # Model Quota Grouping
899
- # =========================================================================
900
-
901
- # Models that share quota timing - when one hits quota, all get same reset time
902
- QUOTA_GROUPS = {
903
- # Future: add claude/gemini groups if they share quota
904
- }
905
-
906
- def get_model_quota_group(self, model: str) -> Optional[str]:
907
- """
908
- Returns the quota group name for a model.
909
-
910
- Claude models (sonnet and opus) share quota on Antigravity.
911
- When one hits quota exhausted, all models in the group get the same reset time.
912
-
913
- Args:
914
- model: Model name (with or without "antigravity/" prefix)
915
-
916
- Returns:
917
- Group name ("claude") or None if not grouped
918
- """
919
- # Remove provider prefix if present
920
- clean_model = model.replace("antigravity/", "")
921
-
922
- for group_name, models in self.QUOTA_GROUPS.items():
923
- if clean_model in models:
924
- return group_name
925
- return None
926
-
927
- def get_models_in_quota_group(self, group: str) -> List[str]:
928
- """
929
- Returns all model names in a quota group.
930
-
931
- Args:
932
- group: Group name (e.g., "claude")
933
-
934
- Returns:
935
- List of model names (without provider prefix)
936
- """
937
- return self.QUOTA_GROUPS.get(group, [])
938
-
939
  async def initialize_credentials(self, credential_paths: List[str]) -> None:
940
  """
941
  Load persisted tier information from credential files at startup.
 
34
  import httpx
35
  import litellm
36
 
37
+ from .provider_interface import ProviderInterface, UsageResetConfigDef, QuotaGroupMap
38
  from .antigravity_auth_base import AntigravityAuthBase
39
  from .provider_cache import ProviderCache
40
  from ..model_definitions import ModelDefinitions
 
497
  # Sequential mode by default - preserves thinking signature caches between requests
498
  default_rotation_mode: str = "sequential"
499
 
500
+ # =========================================================================
501
+ # TIER & USAGE CONFIGURATION
502
+ # =========================================================================
503
+
504
+ # Provider name for env var lookups (QUOTA_GROUPS_ANTIGRAVITY_*)
505
+ provider_env_name: str = "antigravity"
506
+
507
+ # Tier name -> priority mapping (Single Source of Truth)
508
+ # Lower numbers = higher priority
509
+ tier_priorities = {
510
+ # Priority 1: Highest paid tier (Google AI Ultra - name unconfirmed)
511
+ # "google-ai-ultra": 1, # Uncomment when tier name is confirmed
512
+ # Priority 2: Standard paid tier
513
+ "standard-tier": 2,
514
+ # Priority 3: Free tier
515
+ "free-tier": 3,
516
+ # Priority 10: Legacy/Unknown (lowest)
517
+ "legacy-tier": 10,
518
+ "unknown": 10,
519
+ }
520
+
521
+ # Default priority for tiers not in the mapping
522
+ default_tier_priority: int = 10
523
+
524
+ # Usage reset configs keyed by priority sets
525
+ # Priorities 1-2 (paid tiers) get 5h window, others get 7d window
526
+ usage_reset_configs = {
527
+ frozenset({1, 2}): UsageResetConfigDef(
528
+ window_seconds=5 * 60 * 60, # 5 hours
529
+ mode="per_model",
530
+ description="5-hour per-model window (paid tier)",
531
+ field_name="models",
532
+ ),
533
+ "default": UsageResetConfigDef(
534
+ window_seconds=7 * 24 * 60 * 60, # 7 days
535
+ mode="per_model",
536
+ description="7-day per-model window (free/unknown tier)",
537
+ field_name="models",
538
+ ),
539
+ }
540
+
541
+ # Model quota groups (can be overridden via QUOTA_GROUPS_ANTIGRAVITY_CLAUDE)
542
+ model_quota_groups: QuotaGroupMap = {
543
+ # "claude": ["claude-sonnet-4-5", "claude-opus-4-5"],
544
+ }
545
+
546
  @staticmethod
547
  def parse_quota_error(
548
  error: Exception, error_body: Optional[str] = None
 
779
  f"claude_fix={self._enable_claude_tool_fix}, thinking_sanitization={self._enable_thinking_sanitization}"
780
  )
781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
783
  """
784
  Load tier from credential file's _proxy_metadata and cache it.
 
846
  """
847
  return None
848
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
  async def initialize_credentials(self, credential_paths: List[str]) -> None:
850
  """
851
  Load persisted tier information from credential files at startup.
src/rotator_library/providers/gemini_cli_provider.py CHANGED
@@ -189,6 +189,36 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
189
  # Balanced by default - Gemini CLI has short cooldowns (seconds, not hours)
190
  default_rotation_mode: str = "balanced"
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  @staticmethod
193
  def parse_quota_error(
194
  error: Exception, error_body: Optional[str] = None
@@ -264,41 +294,13 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
264
  )
265
 
266
  # =========================================================================
267
- # CREDENTIAL PRIORITIZATION
 
 
 
 
 
268
  # =========================================================================
269
-
270
- def get_credential_priority(self, credential: str) -> Optional[int]:
271
- """
272
- Returns priority based on Gemini tier.
273
- Paid tiers: priority 1 (highest)
274
- Free/Legacy tiers: priority 2
275
- Unknown: priority 10 (lowest)
276
-
277
- Args:
278
- credential: The credential path
279
-
280
- Returns:
281
- Priority level (1-10) or None if tier not yet discovered
282
- """
283
- tier = self.project_tier_cache.get(credential)
284
-
285
- # Lazy load from file if not in cache
286
- if not tier:
287
- tier = self._load_tier_from_file(credential)
288
-
289
- if not tier:
290
- return None # Not yet discovered
291
-
292
- # Paid tiers get highest priority
293
- if tier not in ["free-tier", "legacy-tier", "unknown"]:
294
- return 1
295
-
296
- # Free tier gets lower priority
297
- if tier == "free-tier":
298
- return 2
299
-
300
- # Legacy and unknown get even lower
301
- return 10
302
 
303
  def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
304
  """
 
189
  # Balanced by default - Gemini CLI has short cooldowns (seconds, not hours)
190
  default_rotation_mode: str = "balanced"
191
 
192
+ # =========================================================================
193
+ # TIER CONFIGURATION
194
+ # =========================================================================
195
+
196
+ # Provider name for env var lookups (QUOTA_GROUPS_GEMINI_CLI_*)
197
+ provider_env_name: str = "gemini_cli"
198
+
199
+ # Tier name -> priority mapping (Single Source of Truth)
200
+ # Same tier names as Antigravity (coincidentally), but defined separately
201
+ tier_priorities = {
202
+ # Priority 1: Highest paid tier (Google AI Ultra - name unconfirmed)
203
+ # "google-ai-ultra": 1, # Uncomment when tier name is confirmed
204
+ # Priority 2: Standard paid tier
205
+ "standard-tier": 2,
206
+ # Priority 3: Free tier
207
+ "free-tier": 3,
208
+ # Priority 10: Legacy/Unknown (lowest)
209
+ "legacy-tier": 10,
210
+ "unknown": 10,
211
+ }
212
+
213
+ # Default priority for tiers not in the mapping
214
+ default_tier_priority: int = 10
215
+
216
+ # Gemini CLI uses default daily reset - no custom usage_reset_configs
217
+ # (Empty dict means inherited get_usage_reset_config returns None)
218
+
219
+ # No quota groups defined for Gemini CLI
220
+ # (Models don't share quotas)
221
+
222
  @staticmethod
223
  def parse_quota_error(
224
  error: Exception, error_body: Optional[str] = None
 
294
  )
295
 
296
  # =========================================================================
297
+ # CREDENTIAL TIER LOOKUP (Provider-specific - uses cache)
298
+ # =========================================================================
299
+ #
300
+ # NOTE: get_credential_priority() is now inherited from ProviderInterface.
301
+ # It uses get_credential_tier_name() to get the tier and resolve priority
302
+ # from the tier_priorities class attribute.
303
  # =========================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
  def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
306
  """
src/rotator_library/providers/provider_interface.py CHANGED
@@ -1,10 +1,46 @@
1
  from abc import ABC, abstractmethod
2
- from typing import List, Dict, Any, Optional, AsyncGenerator, Union
 
3
  import os
4
  import httpx
5
  import litellm
6
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  class ProviderInterface(ABC):
9
  """
10
  An interface for API provider-specific functionality, including model
@@ -18,6 +54,40 @@ class ProviderInterface(ABC):
18
  # - "sequential": Use one credential until exhausted, then switch to next
19
  default_rotation_mode: str = "balanced"
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  @abstractmethod
22
  async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
23
  """
@@ -87,28 +157,50 @@ class ProviderInterface(ABC):
87
  pass
88
 
89
  # [NEW] Credential Prioritization System
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def get_credential_priority(self, credential: str) -> Optional[int]:
91
  """
92
  Returns the priority level for a credential.
93
  Lower numbers = higher priority (1 is highest).
94
- Returns None if provider doesn't use priorities.
 
 
 
95
 
96
- This allows providers to auto-detect credential tiers (e.g., paid vs free)
97
- and ensure higher-tier credentials are always tried first.
 
 
98
 
99
  Args:
100
  credential: The credential identifier (API key or path)
101
 
102
  Returns:
103
- Priority level (1-10) or None if no priority system
104
-
105
- Example:
106
- For Gemini CLI:
107
- - Paid tier credentials: priority 1 (highest)
108
- - Free tier credentials: priority 2
109
- - Unknown tier: priority 10 (lowest)
110
  """
111
- return None
 
 
 
112
 
113
  def get_model_tier_requirement(self, model: str) -> Optional[int]:
114
  """
@@ -211,12 +303,76 @@ class ProviderInterface(ABC):
211
  # Per-Provider Usage Tracking Configuration
212
  # =========================================================================
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
215
  """
216
  Get provider-specific usage tracking configuration for a credential.
217
 
218
- This allows providers to define custom usage reset windows based on
219
- credential tier (e.g., paid vs free accounts with different quota periods).
 
 
 
 
220
 
221
  The UsageManager will use this configuration to:
222
  1. Track usage per-model or per-credential based on mode
@@ -231,7 +387,7 @@ class ProviderInterface(ABC):
231
  {
232
  "window_seconds": int, # Duration in seconds (e.g., 18000 for 5h)
233
  "mode": str, # "credential" or "per_model"
234
- "priority": int, # Priority level this config applies to
235
  "description": str, # Human-readable description (for logging)
236
  }
237
 
@@ -242,25 +398,9 @@ class ProviderInterface(ABC):
242
  from first request of THAT model. Models reset independently unless
243
  grouped. If a quota_exhausted error provides exact reset time, that
244
  becomes the authoritative reset time for the model.
245
-
246
- Examples:
247
- Antigravity paid tier (per-model):
248
- {
249
- "window_seconds": 18000, # 5 hours
250
- "mode": "per_model",
251
- "priority": 1,
252
- "description": "5-hour per-model window (paid tier)"
253
- }
254
-
255
- Default provider (credential-level):
256
- {
257
- "window_seconds": 86400, # 24 hours
258
- "mode": "credential",
259
- "priority": 1,
260
- "description": "24-hour credential window"
261
- }
262
  """
263
- return None # Default: use daily reset at daily_reset_time_utc
 
264
 
265
  def get_default_usage_field_name(self) -> str:
266
  """
@@ -278,16 +418,68 @@ class ProviderInterface(ABC):
278
  # Model Quota Grouping
279
  # =========================================================================
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  def get_model_quota_group(self, model: str) -> Optional[str]:
282
  """
283
  Returns the quota group name for a model, or None if not grouped.
284
 
 
 
 
285
  Models in the same quota group share cooldown timing - when one model
286
  hits a quota exhausted error, all models in the group get the same
287
  reset timestamp. They also reset (archive stats) together.
288
 
289
- This is useful for providers where multiple model variants share the
290
- same underlying quota (e.g., Claude Sonnet and Opus on Antigravity).
291
 
292
  Args:
293
  model: Model name (with or without provider prefix)
@@ -295,12 +487,16 @@ class ProviderInterface(ABC):
295
  Returns:
296
  Group name string (e.g., "claude") or None if model is not grouped
297
  """
298
- return None
 
 
299
 
300
  def get_models_in_quota_group(self, group: str) -> List[str]:
301
  """
302
  Returns all model names that belong to a quota group.
303
 
 
 
304
  Args:
305
  group: Group name (e.g., "claude")
306
 
@@ -308,4 +504,4 @@ class ProviderInterface(ABC):
308
  List of model names (WITHOUT provider prefix) in the group.
309
  Empty list if group doesn't exist.
310
  """
311
- return []
 
1
  from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
3
+ from typing import List, Dict, Any, Optional, AsyncGenerator, Union, FrozenSet
4
  import os
5
  import httpx
6
  import litellm
7
 
8
 
9
+ # =============================================================================
10
+ # TIER & USAGE CONFIGURATION TYPES
11
+ # =============================================================================
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class UsageResetConfigDef:
16
+ """
17
+ Definition for usage reset configuration per tier type.
18
+
19
+ Providers define these as class attributes to specify how usage stats
20
+ should reset based on credential tier (paid vs free).
21
+
22
+ Attributes:
23
+ window_seconds: Duration of the usage tracking window in seconds.
24
+ mode: Either "credential" (one window per credential) or "per_model"
25
+ (separate window per model or model group).
26
+ description: Human-readable description for logging.
27
+ field_name: The key used in usage data JSON structure.
28
+ Typically "models" for per_model mode, "daily" for credential mode.
29
+ """
30
+
31
+ window_seconds: int
32
+ mode: str # "credential" or "per_model"
33
+ description: str
34
+ field_name: str = "daily" # Default for backwards compatibility
35
+
36
+
37
+ # Type aliases for provider configuration
38
+ TierPriorityMap = Dict[str, int] # tier_name -> priority
39
+ UsageConfigKey = Union[FrozenSet[int], str] # frozenset of priorities OR "default"
40
+ UsageConfigMap = Dict[UsageConfigKey, UsageResetConfigDef] # priority_set -> config
41
+ QuotaGroupMap = Dict[str, List[str]] # group_name -> [models]
42
+
43
+
44
  class ProviderInterface(ABC):
45
  """
46
  An interface for API provider-specific functionality, including model
 
54
  # - "sequential": Use one credential until exhausted, then switch to next
55
  default_rotation_mode: str = "balanced"
56
 
57
+ # =========================================================================
58
+ # TIER CONFIGURATION - Override in subclass
59
+ # =========================================================================
60
+
61
+ # Provider name for env var lookups (e.g., "antigravity", "gemini_cli")
62
+ # Used for: QUOTA_GROUPS_{provider_env_name}_{GROUP}
63
+ provider_env_name: str = ""
64
+
65
+ # Tier name -> priority mapping (Single Source of Truth)
66
+ # Lower numbers = higher priority (1 is highest)
67
+ # Multiple tiers can map to the same priority
68
+ # Unknown tiers fall back to default_tier_priority
69
+ tier_priorities: TierPriorityMap = {}
70
+
71
+ # Default priority for tiers not in tier_priorities mapping
72
+ default_tier_priority: int = 10
73
+
74
+ # =========================================================================
75
+ # USAGE RESET CONFIGURATION - Override in subclass
76
+ # =========================================================================
77
+
78
+ # Usage reset configurations keyed by priority sets
79
+ # Keys: frozenset of priority values (e.g., frozenset({1, 2})) OR "default"
80
+ # The "default" key is used for any priority not matched by a frozenset
81
+ usage_reset_configs: UsageConfigMap = {}
82
+
83
+ # =========================================================================
84
+ # MODEL QUOTA GROUPS - Override in subclass
85
+ # =========================================================================
86
+
87
+ # Models that share quota/cooldown timing
88
+ # Can be overridden via env: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
89
+ model_quota_groups: QuotaGroupMap = {}
90
+
91
  @abstractmethod
92
  async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
93
  """
 
157
  pass
158
 
159
  # [NEW] Credential Prioritization System
160
+
161
+ # =========================================================================
162
+ # TIER RESOLUTION LOGIC (Centralized)
163
+ # =========================================================================
164
+
165
+ def _resolve_tier_priority(self, tier_name: Optional[str]) -> int:
166
+ """
167
+ Resolve priority for a tier name using provider's tier_priorities mapping.
168
+
169
+ Args:
170
+ tier_name: The tier name string (e.g., "free-tier", "standard-tier")
171
+
172
+ Returns:
173
+ Priority level from tier_priorities, or default_tier_priority if
174
+ tier_name is None or not found in the mapping.
175
+ """
176
+ if tier_name is None:
177
+ return self.default_tier_priority
178
+ return self.tier_priorities.get(tier_name, self.default_tier_priority)
179
+
180
  def get_credential_priority(self, credential: str) -> Optional[int]:
181
  """
182
  Returns the priority level for a credential.
183
  Lower numbers = higher priority (1 is highest).
184
+ Returns None if tier not yet discovered.
185
+
186
+ Uses the provider's tier_priorities mapping to resolve priority from
187
+ tier name. Unknown tiers fall back to default_tier_priority.
188
 
189
+ Subclasses should:
190
+ 1. Define tier_priorities dict with all known tier names
191
+ 2. Override get_credential_tier_name() for tier lookup
192
+ Do NOT override this method.
193
 
194
  Args:
195
  credential: The credential identifier (API key or path)
196
 
197
  Returns:
198
+ Priority level (1-10) or None if tier not yet discovered
 
 
 
 
 
 
199
  """
200
+ tier = self.get_credential_tier_name(credential)
201
+ if tier is None:
202
+ return None # Tier not yet discovered
203
+ return self._resolve_tier_priority(tier)
204
 
205
  def get_model_tier_requirement(self, model: str) -> Optional[int]:
206
  """
 
303
  # Per-Provider Usage Tracking Configuration
304
  # =========================================================================
305
 
306
+ # =========================================================================
307
+ # USAGE RESET CONFIG LOGIC (Centralized)
308
+ # =========================================================================
309
+
310
+ def _find_usage_config_for_priority(
311
+ self, priority: int
312
+ ) -> Optional[UsageResetConfigDef]:
313
+ """
314
+ Find usage config that applies to a priority value.
315
+
316
+ Checks frozenset keys first (priority must be in the set),
317
+ then falls back to "default" key if no match found.
318
+
319
+ Args:
320
+ priority: The credential priority level
321
+
322
+ Returns:
323
+ UsageResetConfigDef if found, None otherwise
324
+ """
325
+ # First, check frozenset keys for explicit priority match
326
+ for key, config in self.usage_reset_configs.items():
327
+ if isinstance(key, frozenset) and priority in key:
328
+ return config
329
+
330
+ # Fall back to "default" key
331
+ return self.usage_reset_configs.get("default")
332
+
333
+ def _build_usage_reset_config(
334
+ self, tier_name: Optional[str]
335
+ ) -> Optional[Dict[str, Any]]:
336
+ """
337
+ Build usage reset configuration dict for a tier.
338
+
339
+ Resolves tier to priority, then finds matching usage config.
340
+ Returns None if provider doesn't define usage_reset_configs.
341
+
342
+ Args:
343
+ tier_name: The tier name string
344
+
345
+ Returns:
346
+ Usage config dict with window_seconds, mode, priority, description,
347
+ field_name, or None if no config applies
348
+ """
349
+ if not self.usage_reset_configs:
350
+ return None
351
+
352
+ priority = self._resolve_tier_priority(tier_name)
353
+ config = self._find_usage_config_for_priority(priority)
354
+
355
+ if config is None:
356
+ return None
357
+
358
+ return {
359
+ "window_seconds": config.window_seconds,
360
+ "mode": config.mode,
361
+ "priority": priority,
362
+ "description": config.description,
363
+ "field_name": config.field_name,
364
+ }
365
+
366
  def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
367
  """
368
  Get provider-specific usage tracking configuration for a credential.
369
 
370
+ Uses the provider's usage_reset_configs class attribute to build
371
+ the configuration dict. Priority is auto-derived from tier.
372
+
373
+ Subclasses should define usage_reset_configs as a class attribute
374
+ instead of overriding this method. Only override get_credential_tier_name()
375
+ to provide the tier lookup mechanism.
376
 
377
  The UsageManager will use this configuration to:
378
  1. Track usage per-model or per-credential based on mode
 
387
  {
388
  "window_seconds": int, # Duration in seconds (e.g., 18000 for 5h)
389
  "mode": str, # "credential" or "per_model"
390
+ "priority": int, # Priority level (auto-derived from tier)
391
  "description": str, # Human-readable description (for logging)
392
  }
393
 
 
398
  from first request of THAT model. Models reset independently unless
399
  grouped. If a quota_exhausted error provides exact reset time, that
400
  becomes the authoritative reset time for the model.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  """
402
+ tier = self.get_credential_tier_name(credential)
403
+ return self._build_usage_reset_config(tier)
404
 
405
  def get_default_usage_field_name(self) -> str:
406
  """
 
418
  # Model Quota Grouping
419
  # =========================================================================
420
 
421
+ # =========================================================================
422
+ # QUOTA GROUPS LOGIC (Centralized)
423
+ # =========================================================================
424
+
425
+ def _get_effective_quota_groups(self) -> QuotaGroupMap:
426
+ """
427
+ Get quota groups with .env overrides applied.
428
+
429
+ Env format: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
430
+ Set empty string to disable a default group.
431
+ """
432
+ if not self.provider_env_name or not self.model_quota_groups:
433
+ return self.model_quota_groups
434
+
435
+ result: QuotaGroupMap = {}
436
+
437
+ for group_name, default_models in self.model_quota_groups.items():
438
+ env_key = (
439
+ f"QUOTA_GROUPS_{self.provider_env_name.upper()}_{group_name.upper()}"
440
+ )
441
+ env_value = os.getenv(env_key)
442
+
443
+ if env_value is not None:
444
+ # Env override present
445
+ if env_value.strip():
446
+ # Parse comma-separated models
447
+ result[group_name] = [
448
+ m.strip() for m in env_value.split(",") if m.strip()
449
+ ]
450
+ # Empty string = group disabled, don't add to result
451
+ else:
452
+ # Use default
453
+ result[group_name] = list(default_models)
454
+
455
+ return result
456
+
457
+ def _find_model_quota_group(self, model: str) -> Optional[str]:
458
+ """Find which quota group a model belongs to."""
459
+ groups = self._get_effective_quota_groups()
460
+ for group_name, models in groups.items():
461
+ if model in models:
462
+ return group_name
463
+ return None
464
+
465
+ def _get_quota_group_models(self, group: str) -> List[str]:
466
+ """Get all models in a quota group."""
467
+ groups = self._get_effective_quota_groups()
468
+ return groups.get(group, [])
469
+
470
  def get_model_quota_group(self, model: str) -> Optional[str]:
471
  """
472
  Returns the quota group name for a model, or None if not grouped.
473
 
474
+ Uses the provider's model_quota_groups class attribute with .env overrides
475
+ via QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2".
476
+
477
  Models in the same quota group share cooldown timing - when one model
478
  hits a quota exhausted error, all models in the group get the same
479
  reset timestamp. They also reset (archive stats) together.
480
 
481
+ Subclasses should define model_quota_groups as a class attribute
482
+ instead of overriding this method.
483
 
484
  Args:
485
  model: Model name (with or without provider prefix)
 
487
  Returns:
488
  Group name string (e.g., "claude") or None if model is not grouped
489
  """
490
+ # Strip provider prefix if present
491
+ clean_model = model.split("/")[-1] if "/" in model else model
492
+ return self._find_model_quota_group(clean_model)
493
 
494
  def get_models_in_quota_group(self, group: str) -> List[str]:
495
  """
496
  Returns all model names that belong to a quota group.
497
 
498
+ Uses the provider's model_quota_groups class attribute with .env overrides.
499
+
500
  Args:
501
  group: Group name (e.g., "claude")
502
 
 
504
  List of model names (WITHOUT provider prefix) in the group.
505
  Empty list if group doesn't exist.
506
  """
507
+ return self._get_quota_group_models(group)