Mirrowel commited on
Commit
b5da45c
·
1 Parent(s): 8a839ed

feat(client): ✨ add credential prioritization system for tier-based model access

Browse files

Implements a comprehensive credential prioritization system that enables providers to enforce tier-based access controls and optimize credential selection based on account types.

Key changes:

- Added `get_credential_priority()` and `get_model_tier_requirement()` methods to ProviderInterface, allowing providers to define credential tiers and model restrictions
- Enhanced UsageManager.acquire_key() to respect credential priorities, always attempting highest-priority credentials first before falling back to lower tiers
- Implemented Gemini-specific tier detection in GeminiCliProvider, mapping paid tier credentials to priority 1, free tier to priority 2, and unknown to priority 10
- Added model-based filtering in RotatingClient to exclude incompatible credentials before acquisition (e.g., Gemini 3 models require paid-tier credentials)
- Improved logging to show priority-aware credential selection and tier compatibility warnings

The system gracefully handles unknown credential tiers by treating them as potentially compatible until their actual tier is discovered on first use. Within each priority level, load balancing by usage count is preserved.

src/rotator_library/client.py CHANGED
@@ -672,6 +672,73 @@ class RotatingClient:
672
  lib_logger.info(f"Resolved model '{model}' to '{resolved_model}'")
673
  model = resolved_model
674
  kwargs["model"] = model # Ensure kwargs has the resolved model for litellm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
 
676
  while (
677
  len(tried_creds) < len(credentials_for_provider) and time.time() < deadline
@@ -710,7 +777,8 @@ class RotatingClient:
710
  max_concurrent = self.max_concurrent_requests_per_key.get(provider, 1)
711
  current_cred = await self.usage_manager.acquire_key(
712
  available_keys=creds_to_try, model=model, deadline=deadline,
713
- max_concurrent=max_concurrent
 
714
  )
715
  key_acquired = True
716
  tried_creds.add(current_cred)
@@ -1047,6 +1115,73 @@ class RotatingClient:
1047
  lib_logger.info(f"Resolved model '{model}' to '{resolved_model}'")
1048
  model = resolved_model
1049
  kwargs["model"] = model # Ensure kwargs has the resolved model for litellm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1050
 
1051
  try:
1052
  while (
@@ -1086,7 +1221,8 @@ class RotatingClient:
1086
  max_concurrent = self.max_concurrent_requests_per_key.get(provider, 1)
1087
  current_cred = await self.usage_manager.acquire_key(
1088
  available_keys=creds_to_try, model=model, deadline=deadline,
1089
- max_concurrent=max_concurrent
 
1090
  )
1091
  key_acquired = True
1092
  tried_creds.add(current_cred)
 
672
  lib_logger.info(f"Resolved model '{model}' to '{resolved_model}'")
673
  model = resolved_model
674
  kwargs["model"] = model # Ensure kwargs has the resolved model for litellm
675
+
676
+ # [NEW] Filter by model tier requirement and build priority map
677
+ credential_priorities = None
678
+ if provider_plugin and hasattr(provider_plugin, 'get_model_tier_requirement'):
679
+ required_tier = provider_plugin.get_model_tier_requirement(model)
680
+ if required_tier is not None:
681
+ # Filter OUT only credentials we KNOW are too low priority
682
+ # Keep credentials with unknown priority (None) - they might be high priority
683
+ incompatible_creds = []
684
+ compatible_creds = []
685
+ unknown_creds = []
686
+
687
+ for cred in credentials_for_provider:
688
+ if hasattr(provider_plugin, 'get_credential_priority'):
689
+ priority = provider_plugin.get_credential_priority(cred)
690
+ if priority is None:
691
+ # Unknown priority - keep it, will be discovered on first use
692
+ unknown_creds.append(cred)
693
+ elif priority <= required_tier:
694
+ # Known compatible priority
695
+ compatible_creds.append(cred)
696
+ else:
697
+ # Known incompatible priority (too low)
698
+ incompatible_creds.append(cred)
699
+ else:
700
+ # Provider doesn't support priorities - keep all
701
+ unknown_creds.append(cred)
702
+
703
+ # If we have any known-compatible or unknown credentials, use them
704
+ tier_compatible_creds = compatible_creds + unknown_creds
705
+ if tier_compatible_creds:
706
+ credentials_for_provider = tier_compatible_creds
707
+ if compatible_creds and unknown_creds:
708
+ lib_logger.info(
709
+ f"Model {model} requires priority <= {required_tier}. "
710
+ f"Using {len(compatible_creds)} known-compatible + {len(unknown_creds)} unknown-tier credentials."
711
+ )
712
+ elif compatible_creds:
713
+ lib_logger.info(
714
+ f"Model {model} requires priority <= {required_tier}. "
715
+ f"Using {len(compatible_creds)} known-compatible credentials."
716
+ )
717
+ else:
718
+ lib_logger.info(
719
+ f"Model {model} requires priority <= {required_tier}. "
720
+ f"Using {len(unknown_creds)} unknown-tier credentials (will discover on use)."
721
+ )
722
+ elif incompatible_creds:
723
+ # Only known-incompatible credentials remain
724
+ lib_logger.warning(
725
+ f"Model {model} requires priority <= {required_tier} credentials, "
726
+ f"but all {len(incompatible_creds)} known credentials have priority > {required_tier}. "
727
+ f"Request will likely fail."
728
+ )
729
+
730
+ # Build priority map for usage_manager
731
+ if provider_plugin and hasattr(provider_plugin, 'get_credential_priority'):
732
+ credential_priorities = {}
733
+ for cred in credentials_for_provider:
734
+ priority = provider_plugin.get_credential_priority(cred)
735
+ if priority is not None:
736
+ credential_priorities[cred] = priority
737
+
738
+ if credential_priorities:
739
+ lib_logger.debug(
740
+ f"Credential priorities for {provider}: {', '.join(f'P{p}={len([c for c in credentials_for_provider if credential_priorities.get(c)==p])}' for p in sorted(set(credential_priorities.values())))}"
741
+ )
742
 
743
  while (
744
  len(tried_creds) < len(credentials_for_provider) and time.time() < deadline
 
777
  max_concurrent = self.max_concurrent_requests_per_key.get(provider, 1)
778
  current_cred = await self.usage_manager.acquire_key(
779
  available_keys=creds_to_try, model=model, deadline=deadline,
780
+ max_concurrent=max_concurrent,
781
+ credential_priorities=credential_priorities
782
  )
783
  key_acquired = True
784
  tried_creds.add(current_cred)
 
1115
  lib_logger.info(f"Resolved model '{model}' to '{resolved_model}'")
1116
  model = resolved_model
1117
  kwargs["model"] = model # Ensure kwargs has the resolved model for litellm
1118
+
1119
+ # [NEW] Filter by model tier requirement and build priority map
1120
+ credential_priorities = None
1121
+ if provider_plugin and hasattr(provider_plugin, 'get_model_tier_requirement'):
1122
+ required_tier = provider_plugin.get_model_tier_requirement(model)
1123
+ if required_tier is not None:
1124
+ # Filter OUT only credentials we KNOW are too low priority
1125
+ # Keep credentials with unknown priority (None) - they might be high priority
1126
+ incompatible_creds = []
1127
+ compatible_creds = []
1128
+ unknown_creds = []
1129
+
1130
+ for cred in credentials_for_provider:
1131
+ if hasattr(provider_plugin, 'get_credential_priority'):
1132
+ priority = provider_plugin.get_credential_priority(cred)
1133
+ if priority is None:
1134
+ # Unknown priority - keep it, will be discovered on first use
1135
+ unknown_creds.append(cred)
1136
+ elif priority <= required_tier:
1137
+ # Known compatible priority
1138
+ compatible_creds.append(cred)
1139
+ else:
1140
+ # Known incompatible priority (too low)
1141
+ incompatible_creds.append(cred)
1142
+ else:
1143
+ # Provider doesn't support priorities - keep all
1144
+ unknown_creds.append(cred)
1145
+
1146
+ # If we have any known-compatible or unknown credentials, use them
1147
+ tier_compatible_creds = compatible_creds + unknown_creds
1148
+ if tier_compatible_creds:
1149
+ credentials_for_provider = tier_compatible_creds
1150
+ if compatible_creds and unknown_creds:
1151
+ lib_logger.info(
1152
+ f"Model {model} requires priority <= {required_tier}. "
1153
+ f"Using {len(compatible_creds)} known-compatible + {len(unknown_creds)} unknown-tier credentials."
1154
+ )
1155
+ elif compatible_creds:
1156
+ lib_logger.info(
1157
+ f"Model {model} requires priority <= {required_tier}. "
1158
+ f"Using {len(compatible_creds)} known-compatible credentials."
1159
+ )
1160
+ else:
1161
+ lib_logger.info(
1162
+ f"Model {model} requires priority <= {required_tier}. "
1163
+ f"Using {len(unknown_creds)} unknown-tier credentials (will discover on use)."
1164
+ )
1165
+ elif incompatible_creds:
1166
+ # Only known-incompatible credentials remain
1167
+ lib_logger.warning(
1168
+ f"Model {model} requires priority <= {required_tier} credentials, "
1169
+ f"but all {len(incompatible_creds)} known credentials have priority > {required_tier}. "
1170
+ f"Request will likely fail."
1171
+ )
1172
+
1173
+ # Build priority map for usage_manager
1174
+ if provider_plugin and hasattr(provider_plugin, 'get_credential_priority'):
1175
+ credential_priorities = {}
1176
+ for cred in credentials_for_provider:
1177
+ priority = provider_plugin.get_credential_priority(cred)
1178
+ if priority is not None:
1179
+ credential_priorities[cred] = priority
1180
+
1181
+ if credential_priorities:
1182
+ lib_logger.debug(
1183
+ f"Credential priorities for {provider}: {', '.join(f'P{p}={len([c for c in credentials_for_provider if credential_priorities.get(c)==p])}' for p in sorted(set(credential_priorities.values())))}"
1184
+ )
1185
 
1186
  try:
1187
  while (
 
1221
  max_concurrent = self.max_concurrent_requests_per_key.get(provider, 1)
1222
  current_cred = await self.usage_manager.acquire_key(
1223
  available_keys=creds_to_try, model=model, deadline=deadline,
1224
+ max_concurrent=max_concurrent,
1225
+ credential_priorities=credential_priorities
1226
  )
1227
  key_acquired = True
1228
  tried_creds.add(current_cred)
src/rotator_library/providers/gemini_cli_provider.py CHANGED
@@ -165,6 +165,59 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
165
  f"cache={self._enable_signature_cache}, gemini3_fix={self._enable_gemini3_tool_fix}"
166
  )
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  # =========================================================================
169
  # MODEL UTILITIES
170
  # =========================================================================
 
165
  f"cache={self._enable_signature_cache}, gemini3_fix={self._enable_gemini3_tool_fix}"
166
  )
167
 
168
+ # =========================================================================
169
+ # CREDENTIAL PRIORITIZATION
170
+ # =========================================================================
171
+
172
+ def get_credential_priority(self, credential: str) -> Optional[int]:
173
+ """
174
+ Returns priority based on Gemini tier.
175
+ Paid tiers: priority 1 (highest)
176
+ Free/Legacy tiers: priority 2
177
+ Unknown: priority 10 (lowest)
178
+
179
+ Args:
180
+ credential: The credential path
181
+
182
+ Returns:
183
+ Priority level (1-10) or None if tier not yet discovered
184
+ """
185
+ tier = self.project_tier_cache.get(credential)
186
+ if not tier:
187
+ return None # Not yet discovered
188
+
189
+ # Paid tiers get highest priority
190
+ if tier not in ['free-tier', 'legacy-tier', 'unknown']:
191
+ return 1
192
+
193
+ # Free tier gets lower priority
194
+ if tier == 'free-tier':
195
+ return 2
196
+
197
+ # Legacy and unknown get even lower
198
+ return 10
199
+
200
+ def get_model_tier_requirement(self, model: str) -> Optional[int]:
201
+ """
202
+ Returns the minimum priority tier required for a model.
203
+ Gemini 3 requires paid tier (priority 1).
204
+
205
+ Args:
206
+ model: The model name (with or without provider prefix)
207
+
208
+ Returns:
209
+ Minimum required priority level or None if no restrictions
210
+ """
211
+ model_name = model.split('/')[-1].replace(':thinking', '')
212
+
213
+ # Gemini 3 requires paid tier
214
+ if model_name.startswith("gemini-3-"):
215
+ return 1 # Only priority 1 (paid) credentials
216
+
217
+ return None # All other models have no restrictions
218
+
219
+
220
+
221
  # =========================================================================
222
  # MODEL UTILITIES
223
  # =========================================================================
src/rotator_library/providers/provider_interface.py CHANGED
@@ -66,4 +66,49 @@ class ProviderInterface(ABC):
66
  """
67
  Proactively refreshes a token if it's nearing expiry.
68
  """
69
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  """
67
  Proactively refreshes a token if it's nearing expiry.
68
  """
69
+ pass
70
+
71
+ # [NEW] Credential Prioritization System
72
+ def get_credential_priority(self, credential: str) -> Optional[int]:
73
+ """
74
+ Returns the priority level for a credential.
75
+ Lower numbers = higher priority (1 is highest).
76
+ Returns None if provider doesn't use priorities.
77
+
78
+ This allows providers to auto-detect credential tiers (e.g., paid vs free)
79
+ and ensure higher-tier credentials are always tried first.
80
+
81
+ Args:
82
+ credential: The credential identifier (API key or path)
83
+
84
+ Returns:
85
+ Priority level (1-10) or None if no priority system
86
+
87
+ Example:
88
+ For Gemini CLI:
89
+ - Paid tier credentials: priority 1 (highest)
90
+ - Free tier credentials: priority 2
91
+ - Unknown tier: priority 10 (lowest)
92
+ """
93
+ return None
94
+
95
+ def get_model_tier_requirement(self, model: str) -> Optional[int]:
96
+ """
97
+ Returns the minimum priority tier required for a model.
98
+ If a model requires priority 1, only credentials with priority <= 1 can use it.
99
+
100
+ This allows providers to restrict certain models to specific credential tiers.
101
+ For example, Gemini 3 models require paid-tier credentials.
102
+
103
+ Args:
104
+ model: The model name (with or without provider prefix)
105
+
106
+ Returns:
107
+ Minimum required priority level or None if no restrictions
108
+
109
+ Example:
110
+ For Gemini CLI:
111
+ - gemini-3-*: requires priority 1 (paid tier only)
112
+ - gemini-2.5-*: no restriction (None)
113
+ """
114
+ return None
src/rotator_library/usage_manager.py CHANGED
@@ -162,11 +162,31 @@ class UsageManager:
162
 
163
  async def acquire_key(
164
  self, available_keys: List[str], model: str, deadline: float,
165
- max_concurrent: int = 1
 
166
  ) -> str:
167
  """
168
  Acquires the best available key using a tiered, model-aware locking strategy,
169
- respecting a global deadline.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  """
171
  await self._lazy_init()
172
  await self._reset_daily_stats_if_needed()
@@ -174,78 +194,180 @@ class UsageManager:
174
 
175
  # This loop continues as long as the global deadline has not been met.
176
  while time.time() < deadline:
177
- tier1_keys, tier2_keys = [], []
178
  now = time.time()
179
 
180
- # First, filter the list of available keys to exclude any on cooldown.
181
- async with self._data_lock:
182
- for key in available_keys:
183
- key_data = self._usage_data.get(key, {})
184
-
185
- if (key_data.get("key_cooldown_until") or 0) > now or (
186
- key_data.get("model_cooldowns", {}).get(model) or 0
187
- ) > now:
188
- continue
189
-
190
- # Prioritize keys based on their current usage to ensure load balancing.
191
- usage_count = (
192
- key_data.get("daily", {})
193
- .get("models", {})
194
- .get(model, {})
195
- .get("success_count", 0)
196
- )
197
- key_state = self.key_states[key]
198
-
199
- # Tier 1: Completely idle keys (preferred).
200
- if not key_state["models_in_use"]:
201
- tier1_keys.append((key, usage_count))
202
- # Tier 2: Keys that can accept more concurrent requests for this model.
203
- elif key_state["models_in_use"].get(model, 0) < max_concurrent:
204
- tier2_keys.append((key, usage_count))
205
-
206
- tier1_keys.sort(key=lambda x: x[1])
207
- tier2_keys.sort(key=lambda x: x[1])
208
-
209
- # Attempt to acquire a key from Tier 1 first.
210
- for key, _ in tier1_keys:
211
- state = self.key_states[key]
212
- async with state["lock"]:
213
- if not state["models_in_use"]:
214
- state["models_in_use"][model] = 1
215
- lib_logger.info(
216
- f"Acquired Tier 1 key ...{key[-6:]} for model {model}"
217
  )
218
- return key
219
-
220
- # If no Tier 1 keys are available, try Tier 2.
221
- for key, _ in tier2_keys:
222
- state = self.key_states[key]
223
- async with state["lock"]:
224
- current_count = state["models_in_use"].get(model, 0)
225
- if current_count < max_concurrent:
226
- state["models_in_use"][model] = current_count + 1
227
- lib_logger.info(
228
- f"Acquired Tier 2 key ...{key[-6:]} for model {model} "
229
- f"(concurrent: {state['models_in_use'][model]}/{max_concurrent})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  )
231
- return key
232
-
233
- # If all eligible keys are locked, wait for a key to be released.
234
- lib_logger.info(
235
- "All eligible keys are currently locked for this model. Waiting..."
236
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
- all_potential_keys = tier1_keys + tier2_keys
239
- if not all_potential_keys:
240
- lib_logger.warning(
241
- "No keys are eligible (all on cooldown). Waiting before re-evaluating."
242
  )
243
- await asyncio.sleep(1)
244
- continue
245
 
246
- # Wait on the condition of the key with the lowest current usage.
247
- best_wait_key = min(all_potential_keys, key=lambda x: x[1])[0]
248
- wait_condition = self.key_states[best_wait_key]["condition"]
 
 
 
 
 
 
 
 
249
 
250
  try:
251
  async with wait_condition:
@@ -266,6 +388,8 @@ class UsageManager:
266
  f"Could not acquire a key for model {model} within the global time budget."
267
  )
268
 
 
 
269
  async def release_key(self, key: str, model: str):
270
  """Releases a key's lock for a specific model and notifies waiting tasks."""
271
  if key not in self.key_states:
 
162
 
163
  async def acquire_key(
164
  self, available_keys: List[str], model: str, deadline: float,
165
+ max_concurrent: int = 1,
166
+ credential_priorities: Optional[Dict[str, int]] = None
167
  ) -> str:
168
  """
169
  Acquires the best available key using a tiered, model-aware locking strategy,
170
+ respecting a global deadline and credential priorities.
171
+
172
+ Priority Logic:
173
+ - Groups credentials by priority level (1=highest, 2=lower, etc.)
174
+ - Always tries highest priority (lowest number) first
175
+ - Within same priority, sorts by usage count (load balancing)
176
+ - Only moves to next priority if all higher-priority keys exhausted/busy
177
+
178
+ Args:
179
+ available_keys: List of credential identifiers to choose from
180
+ model: Model name being requested
181
+ deadline: Timestamp after which to stop trying
182
+ max_concurrent: Maximum concurrent requests allowed per credential
183
+ credential_priorities: Optional dict mapping credentials to priority levels (1=highest)
184
+
185
+ Returns:
186
+ Selected credential identifier
187
+
188
+ Raises:
189
+ NoAvailableKeysError: If no key could be acquired within the deadline
190
  """
191
  await self._lazy_init()
192
  await self._reset_daily_stats_if_needed()
 
194
 
195
  # This loop continues as long as the global deadline has not been met.
196
  while time.time() < deadline:
 
197
  now = time.time()
198
 
199
+ # Group credentials by priority level (if priorities provided)
200
+ if credential_priorities:
201
+ # Group keys by priority level
202
+ priority_groups = {}
203
+ async with self._data_lock:
204
+ for key in available_keys:
205
+ key_data = self._usage_data.get(key, {})
206
+
207
+ # Skip keys on cooldown
208
+ if (key_data.get("key_cooldown_until") or 0) > now or (
209
+ key_data.get("model_cooldowns", {}).get(model) or 0
210
+ ) > now:
211
+ continue
212
+
213
+ # Get priority for this key (default to 999 if not specified)
214
+ priority = credential_priorities.get(key, 999)
215
+
216
+ # Get usage count for load balancing within priority groups
217
+ usage_count = (
218
+ key_data.get("daily", {})
219
+ .get("models", {})
220
+ .get(model, {})
221
+ .get("success_count", 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  )
223
+
224
+ # Group by priority
225
+ if priority not in priority_groups:
226
+ priority_groups[priority] = []
227
+ priority_groups[priority].append((key, usage_count))
228
+
229
+ # Try priority groups in order (1, 2, 3, ...)
230
+ sorted_priorities = sorted(priority_groups.keys())
231
+
232
+ for priority_level in sorted_priorities:
233
+ keys_in_priority = priority_groups[priority_level]
234
+
235
+ # Within each priority group, use existing tier1/tier2 logic
236
+ tier1_keys, tier2_keys = [], []
237
+ for key, usage_count in keys_in_priority:
238
+ key_state = self.key_states[key]
239
+
240
+ # Tier 1: Completely idle keys (preferred)
241
+ if not key_state["models_in_use"]:
242
+ tier1_keys.append((key, usage_count))
243
+ # Tier 2: Keys that can accept more concurrent requests
244
+ elif key_state["models_in_use"].get(model, 0) < max_concurrent:
245
+ tier2_keys.append((key, usage_count))
246
+
247
+ # Sort by usage within each tier
248
+ tier1_keys.sort(key=lambda x: x[1])
249
+ tier2_keys.sort(key=lambda x: x[1])
250
+
251
+ # Try to acquire from Tier 1 first
252
+ for key, usage in tier1_keys:
253
+ state = self.key_states[key]
254
+ async with state["lock"]:
255
+ if not state["models_in_use"]:
256
+ state["models_in_use"][model] = 1
257
+ lib_logger.info(
258
+ f"Acquired Priority-{priority_level} Tier-1 key ...{key[-6:]} for model {model} (usage: {usage})"
259
+ )
260
+ return key
261
+
262
+ # Then try Tier 2
263
+ for key, usage in tier2_keys:
264
+ state = self.key_states[key]
265
+ async with state["lock"]:
266
+ current_count = state["models_in_use"].get(model, 0)
267
+ if current_count < max_concurrent:
268
+ state["models_in_use"][model] = current_count + 1
269
+ lib_logger.info(
270
+ f"Acquired Priority-{priority_level} Tier-2 key ...{key[-6:]} for model {model} "
271
+ f"(concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
272
+ )
273
+ return key
274
+
275
+ # If we get here, all priority groups were exhausted but keys might become available
276
+ # Collect all keys across all priorities for waiting
277
+ all_potential_keys = []
278
+ for keys_list in priority_groups.values():
279
+ all_potential_keys.extend(keys_list)
280
+
281
+ if not all_potential_keys:
282
+ lib_logger.warning(
283
+ "No keys are eligible (all on cooldown or filtered out). Waiting before re-evaluating."
284
+ )
285
+ await asyncio.sleep(1)
286
+ continue
287
+
288
+ # Wait for the highest priority key with lowest usage
289
+ best_priority = min(priority_groups.keys())
290
+ best_priority_keys = priority_groups[best_priority]
291
+ best_wait_key = min(best_priority_keys, key=lambda x: x[1])[0]
292
+ wait_condition = self.key_states[best_wait_key]["condition"]
293
+
294
+ lib_logger.info(
295
+ f"All Priority-{best_priority} keys are busy. Waiting for highest priority credential to become available..."
296
+ )
297
+
298
+ else:
299
+ # Original logic when no priorities specified
300
+ tier1_keys, tier2_keys = [], []
301
+
302
+ # First, filter the list of available keys to exclude any on cooldown.
303
+ async with self._data_lock:
304
+ for key in available_keys:
305
+ key_data = self._usage_data.get(key, {})
306
+
307
+ if (key_data.get("key_cooldown_until") or 0) > now or (
308
+ key_data.get("model_cooldowns", {}).get(model) or 0
309
+ ) > now:
310
+ continue
311
+
312
+ # Prioritize keys based on their current usage to ensure load balancing.
313
+ usage_count = (
314
+ key_data.get("daily", {})
315
+ .get("models", {})
316
+ .get(model, {})
317
+ .get("success_count", 0)
318
  )
319
+ key_state = self.key_states[key]
320
+
321
+ # Tier 1: Completely idle keys (preferred).
322
+ if not key_state["models_in_use"]:
323
+ tier1_keys.append((key, usage_count))
324
+ # Tier 2: Keys that can accept more concurrent requests for this model.
325
+ elif key_state["models_in_use"].get(model, 0) < max_concurrent:
326
+ tier2_keys.append((key, usage_count))
327
+
328
+ tier1_keys.sort(key=lambda x: x[1])
329
+ tier2_keys.sort(key=lambda x: x[1])
330
+
331
+ # Attempt to acquire a key from Tier 1 first.
332
+ for key, _ in tier1_keys:
333
+ state = self.key_states[key]
334
+ async with state["lock"]:
335
+ if not state["models_in_use"]:
336
+ state["models_in_use"][model] = 1
337
+ lib_logger.info(
338
+ f"Acquired Tier 1 key ...{key[-6:]} for model {model}"
339
+ )
340
+ return key
341
+
342
+ # If no Tier 1 keys are available, try Tier 2.
343
+ for key, _ in tier2_keys:
344
+ state = self.key_states[key]
345
+ async with state["lock"]:
346
+ current_count = state["models_in_use"].get(model, 0)
347
+ if current_count < max_concurrent:
348
+ state["models_in_use"][model] = current_count + 1
349
+ lib_logger.info(
350
+ f"Acquired Tier 2 key ...{key[-6:]} for model {model} "
351
+ f"(concurrent: {state['models_in_use'][model]}/{max_concurrent})"
352
+ )
353
+ return key
354
 
355
+ # If all eligible keys are locked, wait for a key to be released.
356
+ lib_logger.info(
357
+ "All eligible keys are currently locked for this model. Waiting..."
 
358
  )
 
 
359
 
360
+ all_potential_keys = tier1_keys + tier2_keys
361
+ if not all_potential_keys:
362
+ lib_logger.warning(
363
+ "No keys are eligible (all on cooldown). Waiting before re-evaluating."
364
+ )
365
+ await asyncio.sleep(1)
366
+ continue
367
+
368
+ # Wait on the condition of the key with the lowest current usage.
369
+ best_wait_key = min(all_potential_keys, key=lambda x: x[1])[0]
370
+ wait_condition = self.key_states[best_wait_key]["condition"]
371
 
372
  try:
373
  async with wait_condition:
 
388
  f"Could not acquire a key for model {model} within the global time budget."
389
  )
390
 
391
+
392
+
393
  async def release_key(self, key: str, model: str):
394
  """Releases a key's lock for a specific model and notifies waiting tasks."""
395
  if key not in self.key_states: