Mirrowel commited on
Commit
aefb706
·
1 Parent(s): 136eb6c

feat(concurrency): ✨ add priority-based concurrency multipliers for credential tiers

Browse files

This commit introduces a flexible priority-based concurrency multiplier system that allows higher-priority credentials (e.g., paid tiers) to handle more concurrent requests than lower-priority credentials, regardless of rotation mode.

Key changes:
- Added `default_priority_multipliers` and `default_sequential_fallback_multiplier` to `ProviderInterface` for provider-level configuration
- Implemented multiplier lookup with mode-specific overrides via environment variables (format: `CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>[_<MODE>]=<multiplier>`)
- Modified `UsageManager` to calculate effective concurrency limits by applying multipliers to base `MAX_CONCURRENT_REQUESTS_PER_KEY` values
- Added `PriorityMultiplierManager` to `settings_tool.py` for runtime configuration and display of multipliers
- Configured default multipliers for Antigravity (P1: 5x, P2: 3x, sequential fallback: 2x) and Gemini CLI (P1: 5x, P2: 3x)
- Introduced `model_usage_weights` to account for models with different quota consumption rates (e.g., Opus counts 2x vs Sonnet)
- Implemented `_get_grouped_usage_count()` for weighted usage calculation across quota groups
- Refactored `_sort_sequential()` to return sorted lists instead of single selection, allowing multipliers to enable multiple concurrent requests in sequential mode
- Enhanced logging to display effective concurrency limits and priority tiers during credential acquisition
- Added comprehensive documentation in `.env.example` explaining the multiplier system and configuration options

The multiplier system preserves existing rotation behavior while allowing paid credentials to maximize throughput. In sequential mode, multipliers enable controlled concurrency while maintaining cache-preserving stickiness. In balanced mode, multipliers provide fair load distribution with tier-appropriate capacity.

.env.example CHANGED
@@ -185,6 +185,37 @@ MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
185
  # ROTATION_MODE_GEMINI=balanced
186
  # ROTATION_MODE_ANTIGRAVITY=sequential
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  # --- Model Quota Groups ---
189
  # Models that share quota/cooldown timing. When one model in a group hits
190
  # quota exhausted (429), all models in the group receive the same cooldown timestamp.
 
185
  # ROTATION_MODE_GEMINI=balanced
186
  # ROTATION_MODE_ANTIGRAVITY=sequential
187
 
188
+ # --- Priority-Based Concurrency Multipliers ---
189
+ # Credentials can be assigned to priority tiers (1=highest, 2, 3, etc.).
190
+ # Each tier can have a concurrency multiplier that increases the effective
191
+ # concurrent request limit for credentials in that tier.
192
+ #
193
+ # How it works:
194
+ # effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier
195
+ #
196
+ # This allows paid/premium credentials to handle more concurrent requests than
197
+ # free tier credentials, regardless of rotation mode.
198
+ #
199
+ # Provider Defaults (built into provider classes):
200
+ # Antigravity:
201
+ # Priority 1: 5x (paid ultra tier)
202
+ # Priority 2: 3x (standard paid tier)
203
+ # Priority 3+: 2x (sequential mode) or 1x (balanced mode)
204
+ # Gemini CLI:
205
+ # Priority 1: 5x
206
+ # Priority 2: 3x
207
+ # Others: 1x (all modes)
208
+ #
209
+ # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
210
+ #
211
+ # Mode-specific overrides (optional):
212
+ # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
213
+ #
214
+ # Examples:
215
+ # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10 # Override P1 to 10x
216
+ # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1 # Override P3 to 1x
217
+ # CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1 # P2 = 1x in balanced mode only
218
+
219
  # --- Model Quota Groups ---
220
  # Models that share quota/cooldown timing. When one model in a group hits
221
  # quota exhausted (429), all models in the group receive the same cooldown timestamp.
src/proxy_app/settings_tool.py CHANGED
@@ -234,6 +234,94 @@ class RotationModeManager:
234
  self.settings.remove(key)
235
 
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  # =============================================================================
238
  # PROVIDER-SPECIFIC SETTINGS DEFINITIONS
239
  # =============================================================================
@@ -424,6 +512,7 @@ class SettingsTool:
424
  self.model_mgr = ModelDefinitionManager(self.settings)
425
  self.concurrency_mgr = ConcurrencyManager(self.settings)
426
  self.rotation_mgr = RotationModeManager(self.settings)
 
427
  self.provider_settings_mgr = ProviderSettingsManager(self.settings)
428
  self.running = True
429
 
@@ -1268,14 +1357,15 @@ class SettingsTool:
1268
  self.console.print()
1269
  self.console.print(" 1. ➕ Set Rotation Mode for Provider")
1270
  self.console.print(" 2. 🗑️ Reset to Provider Default")
1271
- self.console.print(" 3. ↩️ Back to Settings Menu")
 
1272
 
1273
  self.console.print()
1274
  self.console.print("━" * 70)
1275
  self.console.print()
1276
 
1277
  choice = Prompt.ask(
1278
- "Select option", choices=["1", "2", "3"], show_choices=False
1279
  )
1280
 
1281
  if choice == "1":
@@ -1368,8 +1458,170 @@ class SettingsTool:
1368
  input("\nPress Enter to continue...")
1369
 
1370
  elif choice == "3":
 
 
 
1371
  break
1372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1373
  def manage_concurrency_limits(self):
1374
  """Manage concurrency limits"""
1375
  while True:
 
234
  self.settings.remove(key)
235
 
236
 
237
+ class PriorityMultiplierManager:
238
+ """Manages CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N> settings"""
239
+
240
+ def __init__(self, settings: AdvancedSettings):
241
+ self.settings = settings
242
+
243
+ def get_provider_defaults(self, provider: str) -> Dict[int, int]:
244
+ """Get default priority multipliers from provider class"""
245
+ try:
246
+ from rotator_library.providers import PROVIDER_PLUGINS
247
+
248
+ provider_class = PROVIDER_PLUGINS.get(provider.lower())
249
+ if provider_class and hasattr(
250
+ provider_class, "default_priority_multipliers"
251
+ ):
252
+ return dict(provider_class.default_priority_multipliers)
253
+ except ImportError:
254
+ pass
255
+ return {}
256
+
257
+ def get_sequential_fallback(self, provider: str) -> int:
258
+ """Get sequential fallback multiplier from provider class"""
259
+ try:
260
+ from rotator_library.providers import PROVIDER_PLUGINS
261
+
262
+ provider_class = PROVIDER_PLUGINS.get(provider.lower())
263
+ if provider_class and hasattr(
264
+ provider_class, "default_sequential_fallback_multiplier"
265
+ ):
266
+ return provider_class.default_sequential_fallback_multiplier
267
+ except ImportError:
268
+ pass
269
+ return 1
270
+
271
+ def get_current_multipliers(self) -> Dict[str, Dict[int, int]]:
272
+ """Get currently configured priority multipliers from env vars"""
273
+ multipliers: Dict[str, Dict[int, int]] = {}
274
+ for key, value in os.environ.items():
275
+ if key.startswith("CONCURRENCY_MULTIPLIER_") and "_PRIORITY_" in key:
276
+ try:
277
+ # Parse: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>
278
+ parts = key.split("_PRIORITY_")
279
+ provider = parts[0].replace("CONCURRENCY_MULTIPLIER_", "").lower()
280
+ remainder = parts[1]
281
+
282
+ # Check if mode-specific (has _SEQUENTIAL or _BALANCED suffix)
283
+ if "_" in remainder:
284
+ continue # Skip mode-specific for now (show in separate view)
285
+
286
+ priority = int(remainder)
287
+ multiplier = int(value)
288
+
289
+ if provider not in multipliers:
290
+ multipliers[provider] = {}
291
+ multipliers[provider][priority] = multiplier
292
+ except (ValueError, IndexError):
293
+ pass
294
+ return multipliers
295
+
296
+ def get_effective_multiplier(self, provider: str, priority: int) -> int:
297
+ """Get effective multiplier (configured, provider default, or 1)"""
298
+ # Check env var override
299
+ current = self.get_current_multipliers()
300
+ if provider.lower() in current:
301
+ if priority in current[provider.lower()]:
302
+ return current[provider.lower()][priority]
303
+
304
+ # Check provider defaults
305
+ defaults = self.get_provider_defaults(provider)
306
+ if priority in defaults:
307
+ return defaults[priority]
308
+
309
+ # Return 1 (no multiplier)
310
+ return 1
311
+
312
+ def set_multiplier(self, provider: str, priority: int, multiplier: int):
313
+ """Set priority multiplier for a provider"""
314
+ if multiplier < 1:
315
+ raise ValueError("Multiplier must be >= 1")
316
+ key = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_{priority}"
317
+ self.settings.set(key, str(multiplier))
318
+
319
+ def remove_multiplier(self, provider: str, priority: int):
320
+ """Remove multiplier (reset to provider default)"""
321
+ key = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_{priority}"
322
+ self.settings.remove(key)
323
+
324
+
325
  # =============================================================================
326
  # PROVIDER-SPECIFIC SETTINGS DEFINITIONS
327
  # =============================================================================
 
512
  self.model_mgr = ModelDefinitionManager(self.settings)
513
  self.concurrency_mgr = ConcurrencyManager(self.settings)
514
  self.rotation_mgr = RotationModeManager(self.settings)
515
+ self.priority_multiplier_mgr = PriorityMultiplierManager(self.settings)
516
  self.provider_settings_mgr = ProviderSettingsManager(self.settings)
517
  self.running = True
518
 
 
1357
  self.console.print()
1358
  self.console.print(" 1. ➕ Set Rotation Mode for Provider")
1359
  self.console.print(" 2. 🗑️ Reset to Provider Default")
1360
+ self.console.print(" 3. Configure Priority Concurrency Multipliers")
1361
+ self.console.print(" 4. ↩️ Back to Settings Menu")
1362
 
1363
  self.console.print()
1364
  self.console.print("━" * 70)
1365
  self.console.print()
1366
 
1367
  choice = Prompt.ask(
1368
+ "Select option", choices=["1", "2", "3", "4"], show_choices=False
1369
  )
1370
 
1371
  if choice == "1":
 
1458
  input("\nPress Enter to continue...")
1459
 
1460
  elif choice == "3":
1461
+ self.manage_priority_multipliers()
1462
+
1463
+ elif choice == "4":
1464
  break
1465
 
1466
+ def manage_priority_multipliers(self):
1467
+ """Manage priority-based concurrency multipliers per provider"""
1468
+ clear_screen()
1469
+
1470
+ current_multipliers = self.priority_multiplier_mgr.get_current_multipliers()
1471
+ available_providers = self.get_available_providers()
1472
+
1473
+ self.console.print(
1474
+ Panel.fit(
1475
+ "[bold cyan]⚡ Priority Concurrency Multipliers[/bold cyan]",
1476
+ border_style="cyan",
1477
+ )
1478
+ )
1479
+
1480
+ self.console.print()
1481
+ self.console.print("[bold]📋 Current Priority Multiplier Settings[/bold]")
1482
+ self.console.print("━" * 70)
1483
+
1484
+ # Show all providers with their priority multipliers
1485
+ has_settings = False
1486
+ for provider in available_providers:
1487
+ defaults = self.priority_multiplier_mgr.get_provider_defaults(provider)
1488
+ overrides = current_multipliers.get(provider, {})
1489
+ seq_fallback = self.priority_multiplier_mgr.get_sequential_fallback(
1490
+ provider
1491
+ )
1492
+ rotation_mode = self.rotation_mgr.get_effective_mode(provider)
1493
+
1494
+ if defaults or overrides or seq_fallback != 1:
1495
+ has_settings = True
1496
+ self.console.print(
1497
+ f"\n [bold]{provider}[/bold] ({rotation_mode} mode)"
1498
+ )
1499
+
1500
+ # Combine and display priorities
1501
+ all_priorities = set(defaults.keys()) | set(overrides.keys())
1502
+ for priority in sorted(all_priorities):
1503
+ default_val = defaults.get(priority, 1)
1504
+ override_val = overrides.get(priority)
1505
+
1506
+ if override_val is not None:
1507
+ self.console.print(
1508
+ f" Priority {priority}: [cyan]{override_val}x[/cyan] (override, default: {default_val}x)"
1509
+ )
1510
+ else:
1511
+ self.console.print(
1512
+ f" Priority {priority}: {default_val}x [dim](default)[/dim]"
1513
+ )
1514
+
1515
+ # Show sequential fallback if applicable
1516
+ if rotation_mode == "sequential" and seq_fallback != 1:
1517
+ self.console.print(
1518
+ f" Others (seq): {seq_fallback}x [dim](fallback)[/dim]"
1519
+ )
1520
+
1521
+ if not has_settings:
1522
+ self.console.print(" [dim]No priority multipliers configured[/dim]")
1523
+
1524
+ self.console.print()
1525
+ self.console.print("[bold]ℹ️ About Priority Multipliers:[/bold]")
1526
+ self.console.print(
1527
+ " Higher priority tiers (lower numbers) can have higher multipliers."
1528
+ )
1529
+ self.console.print(" Example: Priority 1 = 5x, Priority 2 = 3x, Others = 1x")
1530
+ self.console.print()
1531
+ self.console.print("━" * 70)
1532
+ self.console.print()
1533
+ self.console.print(" 1. ✏️ Set Priority Multiplier")
1534
+ self.console.print(" 2. 🔄 Reset to Provider Default")
1535
+ self.console.print(" 3. ↩️ Back")
1536
+
1537
+ choice = Prompt.ask(
1538
+ "Select option", choices=["1", "2", "3"], show_choices=False
1539
+ )
1540
+
1541
+ if choice == "1":
1542
+ if not available_providers:
1543
+ self.console.print("\n[yellow]No providers available[/yellow]")
1544
+ input("\nPress Enter to continue...")
1545
+ return
1546
+
1547
+ # Select provider
1548
+ self.console.print("\n[bold]Select provider:[/bold]")
1549
+ for idx, prov in enumerate(available_providers, 1):
1550
+ self.console.print(f" {idx}. {prov}")
1551
+
1552
+ prov_idx = IntPrompt.ask(
1553
+ "Provider",
1554
+ choices=[str(i) for i in range(1, len(available_providers) + 1)],
1555
+ )
1556
+ provider = available_providers[prov_idx - 1]
1557
+
1558
+ # Get priority level
1559
+ priority = IntPrompt.ask("Priority level (e.g., 1, 2, 3)")
1560
+
1561
+ # Get current value
1562
+ current = self.priority_multiplier_mgr.get_effective_multiplier(
1563
+ provider, priority
1564
+ )
1565
+ self.console.print(
1566
+ f"\nCurrent multiplier for priority {priority}: {current}x"
1567
+ )
1568
+
1569
+ multiplier = IntPrompt.ask("New multiplier (1-10)", default=current)
1570
+ if 1 <= multiplier <= 10:
1571
+ self.priority_multiplier_mgr.set_multiplier(
1572
+ provider, priority, multiplier
1573
+ )
1574
+ self.console.print(
1575
+ f"\n[green]✅ Priority {priority} multiplier for '{provider}' set to {multiplier}x[/green]"
1576
+ )
1577
+ else:
1578
+ self.console.print(
1579
+ "\n[yellow]Multiplier must be between 1 and 10[/yellow]"
1580
+ )
1581
+ input("\nPress Enter to continue...")
1582
+
1583
+ elif choice == "2":
1584
+ # Find providers with overrides
1585
+ providers_with_overrides = [
1586
+ p for p in available_providers if p in current_multipliers
1587
+ ]
1588
+ if not providers_with_overrides:
1589
+ self.console.print("\n[yellow]No custom multipliers to reset[/yellow]")
1590
+ input("\nPress Enter to continue...")
1591
+ return
1592
+
1593
+ self.console.print("\n[bold]Select provider to reset:[/bold]")
1594
+ for idx, prov in enumerate(providers_with_overrides, 1):
1595
+ self.console.print(f" {idx}. {prov}")
1596
+
1597
+ prov_idx = IntPrompt.ask(
1598
+ "Provider",
1599
+ choices=[str(i) for i in range(1, len(providers_with_overrides) + 1)],
1600
+ )
1601
+ provider = providers_with_overrides[prov_idx - 1]
1602
+
1603
+ # Get priority to reset
1604
+ overrides = current_multipliers.get(provider, {})
1605
+ if len(overrides) == 1:
1606
+ priority = list(overrides.keys())[0]
1607
+ else:
1608
+ self.console.print(f"\nOverrides for {provider}: {overrides}")
1609
+ priority = IntPrompt.ask("Priority level to reset")
1610
+
1611
+ if priority in overrides:
1612
+ self.priority_multiplier_mgr.remove_multiplier(provider, priority)
1613
+ default = self.priority_multiplier_mgr.get_effective_multiplier(
1614
+ provider, priority
1615
+ )
1616
+ self.console.print(
1617
+ f"\n[green]✅ Reset priority {priority} for '{provider}' to default ({default}x)[/green]"
1618
+ )
1619
+ else:
1620
+ self.console.print(
1621
+ f"\n[yellow]No override for priority {priority}[/yellow]"
1622
+ )
1623
+ input("\nPress Enter to continue...")
1624
+
1625
  def manage_concurrency_limits(self):
1626
  """Manage concurrency limits"""
1627
  while True:
src/rotator_library/client.py CHANGED
@@ -161,11 +161,95 @@ class RotatingClient:
161
  if mode != "balanced":
162
  lib_logger.info(f"Provider '{provider}' using rotation mode: {mode}")
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  self.usage_manager = UsageManager(
165
  file_path=usage_file_path,
166
  rotation_tolerance=rotation_tolerance,
167
  provider_rotation_modes=provider_rotation_modes,
168
  provider_plugins=PROVIDER_PLUGINS,
 
 
 
169
  )
170
  self._model_list_cache = {}
171
  self.http_client = httpx.AsyncClient()
 
161
  if mode != "balanced":
162
  lib_logger.info(f"Provider '{provider}' using rotation mode: {mode}")
163
 
164
+ # Build priority-based concurrency multiplier maps
165
+ # These are universal multipliers based on credential tier/priority
166
+ priority_multipliers: Dict[str, Dict[int, int]] = {}
167
+ priority_multipliers_by_mode: Dict[str, Dict[str, Dict[int, int]]] = {}
168
+ sequential_fallback_multipliers: Dict[str, int] = {}
169
+
170
+ for provider in self.all_credentials.keys():
171
+ provider_class = self._provider_plugins.get(provider)
172
+
173
+ # Start with provider class defaults
174
+ if provider_class:
175
+ # Get default priority multipliers from provider class
176
+ if hasattr(provider_class, "default_priority_multipliers"):
177
+ default_multipliers = provider_class.default_priority_multipliers
178
+ if default_multipliers:
179
+ priority_multipliers[provider] = dict(default_multipliers)
180
+
181
+ # Get sequential fallback from provider class
182
+ if hasattr(provider_class, "default_sequential_fallback_multiplier"):
183
+ fallback = provider_class.default_sequential_fallback_multiplier
184
+ if fallback != 1: # Only store if different from global default
185
+ sequential_fallback_multipliers[provider] = fallback
186
+
187
+ # Override with environment variables
188
+ # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
189
+ # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
190
+ for key, value in os.environ.items():
191
+ prefix = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_"
192
+ if key.startswith(prefix):
193
+ remainder = key[len(prefix) :]
194
+ try:
195
+ multiplier = int(value)
196
+ if multiplier < 1:
197
+ lib_logger.warning(f"Invalid {key}: {value}. Must be >= 1.")
198
+ continue
199
+
200
+ # Check if mode-specific (e.g., _PRIORITY_1_SEQUENTIAL)
201
+ if "_" in remainder:
202
+ parts = remainder.rsplit("_", 1)
203
+ priority = int(parts[0])
204
+ mode = parts[1].lower()
205
+ if mode in ("sequential", "balanced"):
206
+ # Mode-specific override
207
+ if provider not in priority_multipliers_by_mode:
208
+ priority_multipliers_by_mode[provider] = {}
209
+ if mode not in priority_multipliers_by_mode[provider]:
210
+ priority_multipliers_by_mode[provider][mode] = {}
211
+ priority_multipliers_by_mode[provider][mode][
212
+ priority
213
+ ] = multiplier
214
+ lib_logger.info(
215
+ f"Provider '{provider}' priority {priority} ({mode} mode) multiplier: {multiplier}x"
216
+ )
217
+ else:
218
+ # Assume it's part of the priority number (unlikely but handle gracefully)
219
+ lib_logger.warning(f"Unknown mode in {key}: {mode}")
220
+ else:
221
+ # Universal priority multiplier
222
+ priority = int(remainder)
223
+ if provider not in priority_multipliers:
224
+ priority_multipliers[provider] = {}
225
+ priority_multipliers[provider][priority] = multiplier
226
+ lib_logger.info(
227
+ f"Provider '{provider}' priority {priority} multiplier: {multiplier}x"
228
+ )
229
+ except ValueError:
230
+ lib_logger.warning(
231
+ f"Invalid {key}: {value}. Could not parse priority or multiplier."
232
+ )
233
+
234
+ # Log configured multipliers
235
+ for provider, multipliers in priority_multipliers.items():
236
+ if multipliers:
237
+ lib_logger.info(
238
+ f"Provider '{provider}' priority multipliers: {multipliers}"
239
+ )
240
+ for provider, fallback in sequential_fallback_multipliers.items():
241
+ lib_logger.info(
242
+ f"Provider '{provider}' sequential fallback multiplier: {fallback}x"
243
+ )
244
+
245
  self.usage_manager = UsageManager(
246
  file_path=usage_file_path,
247
  rotation_tolerance=rotation_tolerance,
248
  provider_rotation_modes=provider_rotation_modes,
249
  provider_plugins=PROVIDER_PLUGINS,
250
+ priority_multipliers=priority_multipliers,
251
+ priority_multipliers_by_mode=priority_multipliers_by_mode,
252
+ sequential_fallback_multipliers=sequential_fallback_multipliers,
253
  )
254
  self._model_list_cache = {}
255
  self.http_client = httpx.AsyncClient()
src/rotator_library/providers/antigravity_provider.py CHANGED
@@ -539,10 +539,29 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
539
  }
540
 
541
  # Model quota groups (can be overridden via QUOTA_GROUPS_ANTIGRAVITY_CLAUDE)
 
542
  model_quota_groups: QuotaGroupMap = {
543
- # "claude": ["claude-sonnet-4-5", "claude-opus-4-5"], - commented out for later use if needed
544
  }
545
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
  @staticmethod
547
  def parse_quota_error(
548
  error: Exception, error_body: Optional[str] = None
 
539
  }
540
 
541
  # Model quota groups (can be overridden via QUOTA_GROUPS_ANTIGRAVITY_CLAUDE)
542
+ # Models in the same group share quota - when one is exhausted, all are
543
  model_quota_groups: QuotaGroupMap = {
544
+ #"claude": ["claude-sonnet-4-5", "claude-opus-4-5"], - commented out for later use if needed
545
  }
546
 
547
+ # Model usage weights for grouped usage calculation
548
+ # Opus consumes more quota per request, so its usage counts 2x when
549
+ # comparing credentials for selection
550
+ model_usage_weights = {
551
+ "claude-opus-4-5": 2,
552
+ }
553
+
554
+ # Priority-based concurrency multipliers
555
+ # Higher priority credentials (lower number) get higher multipliers
556
+ # Priority 1 (paid ultra): 5x concurrent requests
557
+ # Priority 2 (standard paid): 3x concurrent requests
558
+ # Others: Use sequential fallback (2x) or balanced default (1x)
559
+ default_priority_multipliers = {1: 5, 2: 3}
560
+
561
+ # For sequential mode, lower priority tiers still get 2x to maintain stickiness
562
+ # For balanced mode, this doesn't apply (falls back to 1x)
563
+ default_sequential_fallback_multiplier = 2
564
+
565
  @staticmethod
566
  def parse_quota_error(
567
  error: Exception, error_body: Optional[str] = None
src/rotator_library/providers/gemini_cli_provider.py CHANGED
@@ -219,6 +219,16 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
219
  # No quota groups defined for Gemini CLI
220
  # (Models don't share quotas)
221
 
 
 
 
 
 
 
 
 
 
 
222
  @staticmethod
223
  def parse_quota_error(
224
  error: Exception, error_body: Optional[str] = None
 
219
  # No quota groups defined for Gemini CLI
220
  # (Models don't share quotas)
221
 
222
+ # Priority-based concurrency multipliers
223
+ # Same structure as Antigravity (by coincidence, tiers share naming)
224
+ # Priority 1 (paid ultra): 5x concurrent requests
225
+ # Priority 2 (standard paid): 3x concurrent requests
226
+ # Others: 1x (no sequential fallback, uses global default)
227
+ default_priority_multipliers = {1: 5, 2: 3}
228
+
229
+ # No sequential fallback for Gemini CLI (uses balanced mode default)
230
+ # default_sequential_fallback_multiplier = 1 (inherited from ProviderInterface)
231
+
232
  @staticmethod
233
  def parse_quota_error(
234
  error: Exception, error_body: Optional[str] = None
src/rotator_library/providers/provider_interface.py CHANGED
@@ -88,6 +88,30 @@ class ProviderInterface(ABC):
88
  # Can be overridden via env: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
89
  model_quota_groups: QuotaGroupMap = {}
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  @abstractmethod
92
  async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
93
  """
@@ -505,3 +529,20 @@ class ProviderInterface(ABC):
505
  Empty list if group doesn't exist.
506
  """
507
  return self._get_quota_group_models(group)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  # Can be overridden via env: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
89
  model_quota_groups: QuotaGroupMap = {}
90
 
91
+ # Model usage weights for grouped usage calculation
92
+ # When calculating combined usage for quota groups, each model's usage
93
+ # is multiplied by its weight. This accounts for models that consume
94
+ # more quota per request (e.g., Opus uses more than Sonnet).
95
+ # Models not in the map default to weight 1.
96
+ # Example: {"claude-opus-4-5": 2} means Opus usage counts 2x
97
+ model_usage_weights: Dict[str, int] = {}
98
+
99
+ # =========================================================================
100
+ # PRIORITY CONCURRENCY MULTIPLIERS - Override in subclass
101
+ # =========================================================================
102
+
103
+ # Priority-based concurrency multipliers (universal, applies to all modes)
104
+ # Maps priority level -> multiplier
105
+ # Higher priority credentials (lower number) can have higher multipliers
106
+ # to allow more concurrent requests
107
+ # Example: {1: 5, 2: 3} means Priority 1 gets 5x, Priority 2 gets 3x
108
+ default_priority_multipliers: Dict[int, int] = {}
109
+
110
+ # Fallback multiplier for sequential mode when priority not in default_priority_multipliers
111
+ # This is used for lower-priority tiers in sequential mode to maintain some stickiness
112
+ # Default: 1 (no multiplier effect)
113
+ default_sequential_fallback_multiplier: int = 1
114
+
115
  @abstractmethod
116
  async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
117
  """
 
529
  Empty list if group doesn't exist.
530
  """
531
  return self._get_quota_group_models(group)
532
+
533
+ def get_model_usage_weight(self, model: str) -> int:
534
+ """
535
+ Returns the usage weight for a model when calculating grouped usage.
536
+
537
+ Models with higher weights contribute more to the combined group usage.
538
+ This accounts for models that consume more quota per request.
539
+
540
+ Args:
541
+ model: Model name (with or without provider prefix)
542
+
543
+ Returns:
544
+ Weight multiplier (default 1 if not configured)
545
+ """
546
+ # Strip provider prefix if present
547
+ clean_model = model.split("/")[-1] if "/" in model else model
548
+ return self.model_usage_weights.get(clean_model, 1)
src/rotator_library/usage_manager.py CHANGED
@@ -55,6 +55,11 @@ class UsageManager:
55
  rotation_tolerance: float = 0.0,
56
  provider_rotation_modes: Optional[Dict[str, str]] = None,
57
  provider_plugins: Optional[Dict[str, Any]] = None,
 
 
 
 
 
58
  ):
59
  """
60
  Initialize the UsageManager.
@@ -71,11 +76,22 @@ class UsageManager:
71
  - "sequential": Use one credential until exhausted (preserves caching)
72
  provider_plugins: Dict mapping provider names to provider plugin instances.
73
  Used for per-provider usage reset configuration (window durations, field names).
 
 
 
 
 
 
 
 
74
  """
75
  self.file_path = file_path
76
  self.rotation_tolerance = rotation_tolerance
77
  self.provider_rotation_modes = provider_rotation_modes or {}
78
  self.provider_plugins = provider_plugins or PROVIDER_PLUGINS
 
 
 
79
  self._provider_instances: Dict[str, Any] = {} # Cache for provider instances
80
  self.key_states: Dict[str, Dict[str, Any]] = {}
81
 
@@ -107,6 +123,48 @@ class UsageManager:
107
  """
108
  return self.provider_rotation_modes.get(provider, "balanced")
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def _get_provider_from_credential(self, credential: str) -> Optional[str]:
111
  """
112
  Extract provider name from credential path or identifier.
@@ -238,6 +296,60 @@ class UsageManager:
238
 
239
  return []
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  def _get_usage_field_name(self, credential: str) -> str:
242
  """
243
  Get the usage tracking field name for a credential.
@@ -360,59 +472,64 @@ class UsageManager:
360
 
361
  return data
362
 
363
- def _select_sequential(
364
  self,
365
  candidates: List[Tuple[str, int]],
366
  credential_priorities: Optional[Dict[str, int]] = None,
367
- ) -> str:
368
  """
369
- Select credential in strict sequential order for cache-preserving rotation.
370
 
371
- This method ensures the same credential is reused until it hits a cooldown,
372
- which preserves provider-side caching (e.g., thinking signature caches).
373
 
374
- Selection logic:
375
- 1. Sort by priority (lowest number = highest priority)
376
- 2. Within same priority, sort by last_used_ts (most recent first = sticky)
377
- 3. Return the first candidate
 
378
 
379
  Args:
380
  candidates: List of (credential_id, usage_count) tuples
381
  credential_priorities: Optional dict mapping credentials to priority levels
382
 
383
  Returns:
384
- Selected credential ID
385
  """
386
  if not candidates:
387
- raise ValueError("Cannot select from empty candidate list")
388
 
389
  if len(candidates) == 1:
390
- return candidates[0][0]
391
 
392
- def sort_key(item: Tuple[str, int]) -> Tuple[int, float]:
393
- cred, _ = item
394
- # Priority: lower is better (1 = highest priority)
395
  priority = (
396
  credential_priorities.get(cred, 999) if credential_priorities else 999
397
  )
398
- # Last used: higher (more recent) is better for stickiness
399
  last_used = (
400
  self._usage_data.get(cred, {}).get("last_used_ts", 0)
401
  if self._usage_data
402
  else 0
403
  )
404
- # Negative last_used so most recent sorts first
405
- return (priority, -last_used)
 
 
 
 
406
 
407
  sorted_candidates = sorted(candidates, key=sort_key)
408
- selected = sorted_candidates[0][0]
409
 
410
- lib_logger.debug(
411
- f"Sequential selection: chose {mask_credential(selected)} "
412
- f"(priority={credential_priorities.get(selected, 999) if credential_priorities else 'N/A'})"
413
- )
 
 
 
414
 
415
- return selected
416
 
417
  async def _lazy_init(self):
418
  """Initializes the usage data by loading it from the file asynchronously."""
@@ -966,7 +1083,8 @@ class UsageManager:
966
  priority = credential_priorities.get(key, 999)
967
 
968
  # Get usage count for load balancing within priority groups
969
- usage_count = self._get_usage_count(key, model)
 
970
 
971
  # Group by priority
972
  if priority not in priority_groups:
@@ -979,6 +1097,16 @@ class UsageManager:
979
  for priority_level in sorted_priorities:
980
  keys_in_priority = priority_groups[priority_level]
981
 
 
 
 
 
 
 
 
 
 
 
982
  # Within each priority group, use existing tier1/tier2 logic
983
  tier1_keys, tier2_keys = [], []
984
  for key, usage_count in keys_in_priority:
@@ -988,30 +1116,24 @@ class UsageManager:
988
  if not key_state["models_in_use"]:
989
  tier1_keys.append((key, usage_count))
990
  # Tier 2: Keys that can accept more concurrent requests
991
- elif key_state["models_in_use"].get(model, 0) < max_concurrent:
 
 
 
992
  tier2_keys.append((key, usage_count))
993
 
994
- # Determine selection method based on provider's rotation mode
995
- provider = model.split("/")[0] if "/" in model else ""
996
- rotation_mode = self._get_rotation_mode(provider)
997
-
998
  if rotation_mode == "sequential":
999
- # Sequential mode: stick with same credential until exhausted
 
1000
  selection_method = "sequential"
1001
  if tier1_keys:
1002
- selected_key = self._select_sequential(
1003
  tier1_keys, credential_priorities
1004
  )
1005
- tier1_keys = [
1006
- (k, u) for k, u in tier1_keys if k == selected_key
1007
- ]
1008
  if tier2_keys:
1009
- selected_key = self._select_sequential(
1010
  tier2_keys, credential_priorities
1011
  )
1012
- tier2_keys = [
1013
- (k, u) for k, u in tier2_keys if k == selected_key
1014
- ]
1015
  elif self.rotation_tolerance > 0:
1016
  # Balanced mode with weighted randomness
1017
  selection_method = "weighted-random"
@@ -1057,7 +1179,7 @@ class UsageManager:
1057
  state = self.key_states[key]
1058
  async with state["lock"]:
1059
  current_count = state["models_in_use"].get(model, 0)
1060
- if current_count < max_concurrent:
1061
  state["models_in_use"][model] = current_count + 1
1062
  tier_name = (
1063
  credential_tier_names.get(key, "unknown")
@@ -1066,7 +1188,7 @@ class UsageManager:
1066
  )
1067
  lib_logger.info(
1068
  f"Acquired key {mask_credential(key)} for model {model} "
1069
- f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
1070
  )
1071
  return key
1072
 
@@ -1095,6 +1217,19 @@ class UsageManager:
1095
 
1096
  else:
1097
  # Original logic when no priorities specified
 
 
 
 
 
 
 
 
 
 
 
 
 
1098
  tier1_keys, tier2_keys = [], []
1099
 
1100
  # First, filter the list of available keys to exclude any on cooldown.
@@ -1108,37 +1243,32 @@ class UsageManager:
1108
  continue
1109
 
1110
  # Prioritize keys based on their current usage to ensure load balancing.
1111
- usage_count = self._get_usage_count(key, model)
 
1112
  key_state = self.key_states[key]
1113
 
1114
  # Tier 1: Completely idle keys (preferred).
1115
  if not key_state["models_in_use"]:
1116
  tier1_keys.append((key, usage_count))
1117
  # Tier 2: Keys that can accept more concurrent requests for this model.
1118
- elif key_state["models_in_use"].get(model, 0) < max_concurrent:
 
 
 
1119
  tier2_keys.append((key, usage_count))
1120
 
1121
- # Determine selection method based on provider's rotation mode
1122
- provider = model.split("/")[0] if "/" in model else ""
1123
- rotation_mode = self._get_rotation_mode(provider)
1124
-
1125
  if rotation_mode == "sequential":
1126
- # Sequential mode: stick with same credential until exhausted
 
1127
  selection_method = "sequential"
1128
  if tier1_keys:
1129
- selected_key = self._select_sequential(
1130
  tier1_keys, credential_priorities
1131
  )
1132
- tier1_keys = [
1133
- (k, u) for k, u in tier1_keys if k == selected_key
1134
- ]
1135
  if tier2_keys:
1136
- selected_key = self._select_sequential(
1137
  tier2_keys, credential_priorities
1138
  )
1139
- tier2_keys = [
1140
- (k, u) for k, u in tier2_keys if k == selected_key
1141
- ]
1142
  elif self.rotation_tolerance > 0:
1143
  # Balanced mode with weighted randomness
1144
  selection_method = "weighted-random"
@@ -1185,7 +1315,7 @@ class UsageManager:
1185
  state = self.key_states[key]
1186
  async with state["lock"]:
1187
  current_count = state["models_in_use"].get(model, 0)
1188
- if current_count < max_concurrent:
1189
  state["models_in_use"][model] = current_count + 1
1190
  tier_name = (
1191
  credential_tier_names.get(key)
@@ -1195,7 +1325,7 @@ class UsageManager:
1195
  tier_info = f"tier: {tier_name}, " if tier_name else ""
1196
  lib_logger.info(
1197
  f"Acquired key {mask_credential(key)} for model {model} "
1198
- f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
1199
  )
1200
  return key
1201
 
 
55
  rotation_tolerance: float = 0.0,
56
  provider_rotation_modes: Optional[Dict[str, str]] = None,
57
  provider_plugins: Optional[Dict[str, Any]] = None,
58
+ priority_multipliers: Optional[Dict[str, Dict[int, int]]] = None,
59
+ priority_multipliers_by_mode: Optional[
60
+ Dict[str, Dict[str, Dict[int, int]]]
61
+ ] = None,
62
+ sequential_fallback_multipliers: Optional[Dict[str, int]] = None,
63
  ):
64
  """
65
  Initialize the UsageManager.
 
76
  - "sequential": Use one credential until exhausted (preserves caching)
77
  provider_plugins: Dict mapping provider names to provider plugin instances.
78
  Used for per-provider usage reset configuration (window durations, field names).
79
+ priority_multipliers: Dict mapping provider -> priority -> multiplier.
80
+ Universal multipliers that apply regardless of rotation mode.
81
+ Example: {"antigravity": {1: 5, 2: 3}}
82
+ priority_multipliers_by_mode: Dict mapping provider -> mode -> priority -> multiplier.
83
+ Mode-specific overrides. Example: {"antigravity": {"balanced": {3: 1}}}
84
+ sequential_fallback_multipliers: Dict mapping provider -> fallback multiplier.
85
+ Used in sequential mode when priority not in priority_multipliers.
86
+ Example: {"antigravity": 2}
87
  """
88
  self.file_path = file_path
89
  self.rotation_tolerance = rotation_tolerance
90
  self.provider_rotation_modes = provider_rotation_modes or {}
91
  self.provider_plugins = provider_plugins or PROVIDER_PLUGINS
92
+ self.priority_multipliers = priority_multipliers or {}
93
+ self.priority_multipliers_by_mode = priority_multipliers_by_mode or {}
94
+ self.sequential_fallback_multipliers = sequential_fallback_multipliers or {}
95
  self._provider_instances: Dict[str, Any] = {} # Cache for provider instances
96
  self.key_states: Dict[str, Dict[str, Any]] = {}
97
 
 
123
  """
124
  return self.provider_rotation_modes.get(provider, "balanced")
125
 
126
+ def _get_priority_multiplier(
127
+ self, provider: str, priority: int, rotation_mode: str
128
+ ) -> int:
129
+ """
130
+ Get the concurrency multiplier for a provider/priority/mode combination.
131
+
132
+ Lookup order:
133
+ 1. Mode-specific tier override: priority_multipliers_by_mode[provider][mode][priority]
134
+ 2. Universal tier multiplier: priority_multipliers[provider][priority]
135
+ 3. Sequential fallback (if mode is sequential): sequential_fallback_multipliers[provider]
136
+ 4. Global default: 1 (no multiplier effect)
137
+
138
+ Args:
139
+ provider: Provider name (e.g., "antigravity")
140
+ priority: Priority level (1 = highest priority)
141
+ rotation_mode: Current rotation mode ("sequential" or "balanced")
142
+
143
+ Returns:
144
+ Multiplier value
145
+ """
146
+ provider_lower = provider.lower()
147
+
148
+ # 1. Check mode-specific override
149
+ if provider_lower in self.priority_multipliers_by_mode:
150
+ mode_multipliers = self.priority_multipliers_by_mode[provider_lower]
151
+ if rotation_mode in mode_multipliers:
152
+ if priority in mode_multipliers[rotation_mode]:
153
+ return mode_multipliers[rotation_mode][priority]
154
+
155
+ # 2. Check universal tier multiplier
156
+ if provider_lower in self.priority_multipliers:
157
+ if priority in self.priority_multipliers[provider_lower]:
158
+ return self.priority_multipliers[provider_lower][priority]
159
+
160
+ # 3. Sequential fallback (only for sequential mode)
161
+ if rotation_mode == "sequential":
162
+ if provider_lower in self.sequential_fallback_multipliers:
163
+ return self.sequential_fallback_multipliers[provider_lower]
164
+
165
+ # 4. Global default
166
+ return 1
167
+
168
  def _get_provider_from_credential(self, credential: str) -> Optional[str]:
169
  """
170
  Extract provider name from credential path or identifier.
 
296
 
297
  return []
298
 
299
+ def _get_model_usage_weight(self, credential: str, model: str) -> int:
300
+ """
301
+ Get the usage weight for a model when calculating grouped usage.
302
+
303
+ Args:
304
+ credential: The credential identifier
305
+ model: Model name (with or without provider prefix)
306
+
307
+ Returns:
308
+ Weight multiplier (default 1 if not configured)
309
+ """
310
+ provider = self._get_provider_from_credential(credential)
311
+ plugin_instance = self._get_provider_instance(provider)
312
+
313
+ if plugin_instance and hasattr(plugin_instance, "get_model_usage_weight"):
314
+ return plugin_instance.get_model_usage_weight(model)
315
+
316
+ return 1
317
+
318
+ def _get_grouped_usage_count(self, key: str, model: str) -> int:
319
+ """
320
+ Get usage count for credential selection, considering quota groups.
321
+
322
+ If the model belongs to a quota group, returns the weighted combined usage
323
+ across all models in the group. Otherwise returns individual model usage.
324
+
325
+ Weights are applied per-model to account for models that consume more quota
326
+ per request (e.g., Opus might count 2x compared to Sonnet).
327
+
328
+ Args:
329
+ key: Credential identifier
330
+ model: Model name (with provider prefix, e.g., "antigravity/claude-sonnet-4-5")
331
+
332
+ Returns:
333
+ Weighted combined usage if grouped, otherwise individual model usage
334
+ """
335
+ # Check if model is in a quota group
336
+ group = self._get_model_quota_group(key, model)
337
+
338
+ if group:
339
+ # Get all models in the group
340
+ grouped_models = self._get_grouped_models(key, group)
341
+
342
+ # Sum weighted usage across all models in the group
343
+ total_weighted_usage = 0
344
+ for grouped_model in grouped_models:
345
+ usage = self._get_usage_count(key, grouped_model)
346
+ weight = self._get_model_usage_weight(key, grouped_model)
347
+ total_weighted_usage += usage * weight
348
+ return total_weighted_usage
349
+
350
+ # Not grouped - return individual model usage (no weight applied)
351
+ return self._get_usage_count(key, model)
352
+
353
  def _get_usage_field_name(self, credential: str) -> str:
354
  """
355
  Get the usage tracking field name for a credential.
 
472
 
473
  return data
474
 
475
+ def _sort_sequential(
476
  self,
477
  candidates: List[Tuple[str, int]],
478
  credential_priorities: Optional[Dict[str, int]] = None,
479
+ ) -> List[Tuple[str, int]]:
480
  """
481
+ Sort credentials for sequential mode with position retention.
482
 
483
+ Credentials maintain their position based on established usage patterns,
484
+ ensuring that actively-used credentials remain primary until exhausted.
485
 
486
+ Sorting order (within each sort key, lower value = higher priority):
487
+ 1. Priority tier (lower number = higher priority)
488
+ 2. Usage count (higher = more established in rotation, maintains position)
489
+ 3. Last used timestamp (higher = more recent, tiebreaker for stickiness)
490
+ 4. Credential ID (alphabetical, stable ordering)
491
 
492
  Args:
493
  candidates: List of (credential_id, usage_count) tuples
494
  credential_priorities: Optional dict mapping credentials to priority levels
495
 
496
  Returns:
497
+ Sorted list of candidates (same format as input)
498
  """
499
  if not candidates:
500
+ return []
501
 
502
  if len(candidates) == 1:
503
+ return candidates
504
 
505
+ def sort_key(item: Tuple[str, int]) -> Tuple[int, int, float, str]:
506
+ cred, usage_count = item
 
507
  priority = (
508
  credential_priorities.get(cred, 999) if credential_priorities else 999
509
  )
 
510
  last_used = (
511
  self._usage_data.get(cred, {}).get("last_used_ts", 0)
512
  if self._usage_data
513
  else 0
514
  )
515
+ return (
516
+ priority, # ASC: lower priority number = higher priority
517
+ -usage_count, # DESC: higher usage = more established
518
+ -last_used, # DESC: more recent = preferred for ties
519
+ cred, # ASC: stable alphabetical ordering
520
+ )
521
 
522
  sorted_candidates = sorted(candidates, key=sort_key)
 
523
 
524
+ # Debug logging - show top 3 credentials in ordering
525
+ if lib_logger.isEnabledFor(logging.DEBUG):
526
+ order_info = [
527
+ f"{mask_credential(c)}(p={credential_priorities.get(c, 999) if credential_priorities else 'N/A'}, u={u})"
528
+ for c, u in sorted_candidates[:3]
529
+ ]
530
+ lib_logger.debug(f"Sequential ordering: {' → '.join(order_info)}")
531
 
532
+ return sorted_candidates
533
 
534
  async def _lazy_init(self):
535
  """Initializes the usage data by loading it from the file asynchronously."""
 
1083
  priority = credential_priorities.get(key, 999)
1084
 
1085
  # Get usage count for load balancing within priority groups
1086
+ # Uses grouped usage if model is in a quota group
1087
+ usage_count = self._get_grouped_usage_count(key, model)
1088
 
1089
  # Group by priority
1090
  if priority not in priority_groups:
 
1097
  for priority_level in sorted_priorities:
1098
  keys_in_priority = priority_groups[priority_level]
1099
 
1100
+ # Determine selection method based on provider's rotation mode
1101
+ provider = model.split("/")[0] if "/" in model else ""
1102
+ rotation_mode = self._get_rotation_mode(provider)
1103
+
1104
+ # Calculate effective concurrency based on priority tier
1105
+ multiplier = self._get_priority_multiplier(
1106
+ provider, priority_level, rotation_mode
1107
+ )
1108
+ effective_max_concurrent = max_concurrent * multiplier
1109
+
1110
  # Within each priority group, use existing tier1/tier2 logic
1111
  tier1_keys, tier2_keys = [], []
1112
  for key, usage_count in keys_in_priority:
 
1116
  if not key_state["models_in_use"]:
1117
  tier1_keys.append((key, usage_count))
1118
  # Tier 2: Keys that can accept more concurrent requests
1119
+ elif (
1120
+ key_state["models_in_use"].get(model, 0)
1121
+ < effective_max_concurrent
1122
+ ):
1123
  tier2_keys.append((key, usage_count))
1124
 
 
 
 
 
1125
  if rotation_mode == "sequential":
1126
+ # Sequential mode: sort credentials by priority, usage, recency
1127
+ # Keep all candidates in sorted order (no filtering to single key)
1128
  selection_method = "sequential"
1129
  if tier1_keys:
1130
+ tier1_keys = self._sort_sequential(
1131
  tier1_keys, credential_priorities
1132
  )
 
 
 
1133
  if tier2_keys:
1134
+ tier2_keys = self._sort_sequential(
1135
  tier2_keys, credential_priorities
1136
  )
 
 
 
1137
  elif self.rotation_tolerance > 0:
1138
  # Balanced mode with weighted randomness
1139
  selection_method = "weighted-random"
 
1179
  state = self.key_states[key]
1180
  async with state["lock"]:
1181
  current_count = state["models_in_use"].get(model, 0)
1182
+ if current_count < effective_max_concurrent:
1183
  state["models_in_use"][model] = current_count + 1
1184
  tier_name = (
1185
  credential_tier_names.get(key, "unknown")
 
1188
  )
1189
  lib_logger.info(
1190
  f"Acquired key {mask_credential(key)} for model {model} "
1191
+ f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
1192
  )
1193
  return key
1194
 
 
1217
 
1218
  else:
1219
  # Original logic when no priorities specified
1220
+
1221
+ # Determine selection method based on provider's rotation mode
1222
+ provider = model.split("/")[0] if "/" in model else ""
1223
+ rotation_mode = self._get_rotation_mode(provider)
1224
+
1225
+ # Calculate effective concurrency for default priority (999)
1226
+ # When no priorities are specified, all credentials get default priority
1227
+ default_priority = 999
1228
+ multiplier = self._get_priority_multiplier(
1229
+ provider, default_priority, rotation_mode
1230
+ )
1231
+ effective_max_concurrent = max_concurrent * multiplier
1232
+
1233
  tier1_keys, tier2_keys = [], []
1234
 
1235
  # First, filter the list of available keys to exclude any on cooldown.
 
1243
  continue
1244
 
1245
  # Prioritize keys based on their current usage to ensure load balancing.
1246
+ # Uses grouped usage if model is in a quota group
1247
+ usage_count = self._get_grouped_usage_count(key, model)
1248
  key_state = self.key_states[key]
1249
 
1250
  # Tier 1: Completely idle keys (preferred).
1251
  if not key_state["models_in_use"]:
1252
  tier1_keys.append((key, usage_count))
1253
  # Tier 2: Keys that can accept more concurrent requests for this model.
1254
+ elif (
1255
+ key_state["models_in_use"].get(model, 0)
1256
+ < effective_max_concurrent
1257
+ ):
1258
  tier2_keys.append((key, usage_count))
1259
 
 
 
 
 
1260
  if rotation_mode == "sequential":
1261
+ # Sequential mode: sort credentials by priority, usage, recency
1262
+ # Keep all candidates in sorted order (no filtering to single key)
1263
  selection_method = "sequential"
1264
  if tier1_keys:
1265
+ tier1_keys = self._sort_sequential(
1266
  tier1_keys, credential_priorities
1267
  )
 
 
 
1268
  if tier2_keys:
1269
+ tier2_keys = self._sort_sequential(
1270
  tier2_keys, credential_priorities
1271
  )
 
 
 
1272
  elif self.rotation_tolerance > 0:
1273
  # Balanced mode with weighted randomness
1274
  selection_method = "weighted-random"
 
1315
  state = self.key_states[key]
1316
  async with state["lock"]:
1317
  current_count = state["models_in_use"].get(model, 0)
1318
+ if current_count < effective_max_concurrent:
1319
  state["models_in_use"][model] = current_count + 1
1320
  tier_name = (
1321
  credential_tier_names.get(key)
 
1325
  tier_info = f"tier: {tier_name}, " if tier_name else ""
1326
  lib_logger.info(
1327
  f"Acquired key {mask_credential(key)} for model {model} "
1328
+ f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
1329
  )
1330
  return key
1331