Spaces:

Cuong2004
/

LocalMate

Sleeping

App Files Files Community

Cuong2004 commited on Dec 19, 2025

Commit

14208c6

1 Parent(s): 4a35a3f

fix logic agent and add api rotation

Browse files

Files changed (6) hide show

.env.example +5 -5
app/agent/mmca_agent.py +116 -12
app/api/router.py +10 -20
app/shared/integrations/key_rotator.py +142 -0
app/shared/integrations/megallm_client.py +22 -9
tests/test_key_rotation.py +149 -0

.env.example CHANGED Viewed

@@ -20,8 +20,11 @@ GOOGLE_API_KEY=your_google_api_key
 GOOGLE_CLIENT_ID=your_google_api_key
 JWT_SECRET=your-super-secret-jwt-key-change-in-production
-# MegaLLM (OpenAI-compatible - DeepSeek)
-MEGALLM_API_KEY=your_megallm_api_key
 MEGALLM_BASE_URL=https://ai.megallm.io/v1
 # Brave Social Search
@@ -29,6 +32,3 @@ BRAVE_API_KEY=your_brave_api_key
 # Google OAuth
 GOOGLE_CLIENT_ID=your_google_client_id
-# CLIP (optional - for image embeddings)
-HUGGINGFACE_API_KEY=your_hf_api_key

 GOOGLE_CLIENT_ID=your_google_api_key
 JWT_SECRET=your-super-secret-jwt-key-change-in-production
+# MegaLLM (API Key Rotation - add as many as needed)
+# Keys are rotated round-robin to avoid 15 req/min limit per key
+MEGALLM_API_KEY_1=your_first_megallm_api_key
+MEGALLM_API_KEY_2=your_second_megallm_api_key
+MEGALLM_API_KEY_3=your_third_megallm_api_key
 MEGALLM_BASE_URL=https://ai.megallm.io/v1
 # Brave Social Search
 # Google OAuth
 GOOGLE_CLIENT_ID=your_google_client_id

app/agent/mmca_agent.py CHANGED Viewed

@@ -10,6 +10,7 @@ Supports multiple LLM providers: Google (Gemini) and MegaLLM (DeepSeek).
 """
 import json
 import time
 from dataclasses import dataclass, field
 from typing import Any
@@ -83,6 +84,7 @@ class ChatResult:
     tools_used: list[str] = field(default_factory=list)
     total_duration_ms: float = 0
     tool_results: list = field(default_factory=list)  # List of ToolCall with results
 class MMCAAgent:
@@ -209,7 +211,7 @@ class MMCAAgent:
         agent_logger.workflow_step("Step 3: Synthesize Response")
         llm_start = time.time()
-        response = await self._synthesize_response(message, tool_results, image_url, history)
         llm_duration = (time.time() - llm_start) * 1000
         agent_logger.llm_response(self.provider, response[:100], tokens=None)
@@ -228,14 +230,15 @@ class MMCAAgent:
         workflow.total_duration_ms = total_duration
         # Log complete
-        agent_logger.api_response("/chat", 200, {"response_len": len(response)}, total_duration)
         return ChatResult(
             response=response,
             workflow=workflow,
             tools_used=workflow.tools_used,
             total_duration_ms=total_duration,
-            tool_results=tool_results,  # Include tool results for place extraction
         )
     def _detect_intent(self, message: str, image_url: str | None) -> str:
@@ -269,6 +272,35 @@ class MMCAAgent:
         }
         return purposes.get(tool_name, tool_name)
     async def _plan_tool_calls(
         self,
         message: str,
@@ -278,7 +310,13 @@ class MMCAAgent:
         Analyze message and plan which tools to call.
         Returns list of ToolCall objects with tool_name and arguments.
         """
         tool_calls = []
         # If image is provided, always use visual search
@@ -343,7 +381,6 @@ class MMCAAgent:
                 arguments={"query": message, "limit": 5},
             ))
         return tool_calls
     async def _execute_tool(
@@ -442,8 +479,39 @@ class MMCAAgent:
         tool_results: list[ToolCall],
         image_url: str | None = None,
         history: str | None = None,
-    ) -> str:
-        """Synthesize final response from tool results with conversation history."""
         # Build context from tool results
         context_parts = []
         for tool_call in tool_results:
@@ -452,7 +520,7 @@ class MMCAAgent:
                     f"Kết quả từ {tool_call.tool_name}:\n{json.dumps(tool_call.result, ensure_ascii=False, indent=2)}"
                 )
-        context = "\n\n".join(context_parts) if context_parts else "Không tìm thấy kết quả phù hợp."
         # Build history section if available
         history_section = ""
@@ -463,25 +531,61 @@ class MMCAAgent:
 ---
 """
-        # Generate response using LLM
-        prompt = f"""{history_section}Dựa trên kết quả tìm kiếm sau, hãy trả lời câu hỏi của người dùng một cách tự nhiên và hữu ích.
 Câu hỏi hiện tại: {message}
 {context}
-Hãy trả lời bằng tiếng Việt, thân thiện. Nếu có nhiều kết quả, hãy giới thiệu top 2-3 địa điểm phù hợp nhất.
 Nếu có lịch sử hội thoại, hãy cân nhắc ngữ cảnh trước đó khi trả lời."""
         agent_logger.llm_call(self.provider, self.model or "default", prompt[:100])
-        response = await self.llm_client.generate(
             prompt=prompt,
             temperature=0.7,
             system_instruction=SYSTEM_PROMPT,
         )
-        return response
     def _extract_location(self, message: str) -> str | None:
         """Extract location name from message using pattern matching."""

 """
 import json
+import re
 import time
 from dataclasses import dataclass, field
 from typing import Any
     tools_used: list[str] = field(default_factory=list)
     total_duration_ms: float = 0
     tool_results: list = field(default_factory=list)  # List of ToolCall with results
+    selected_place_ids: list[str] = field(default_factory=list)  # LLM-selected place IDs
 class MMCAAgent:
         agent_logger.workflow_step("Step 3: Synthesize Response")
         llm_start = time.time()
+        response, selected_place_ids = await self._synthesize_response(message, tool_results, image_url, history)
         llm_duration = (time.time() - llm_start) * 1000
         agent_logger.llm_response(self.provider, response[:100], tokens=None)
         workflow.total_duration_ms = total_duration
         # Log complete
+        agent_logger.api_response("/chat", 200, {"response_len": len(response), "places": len(selected_place_ids)}, total_duration)
         return ChatResult(
             response=response,
             workflow=workflow,
             tools_used=workflow.tools_used,
             total_duration_ms=total_duration,
+            tool_results=tool_results,
+            selected_place_ids=selected_place_ids,
         )
     def _detect_intent(self, message: str, image_url: str | None) -> str:
         }
         return purposes.get(tool_name, tool_name)
+    def _is_greeting_or_simple_query(self, message: str) -> bool:
+        """
+        Check if message is a simple greeting/small-talk that doesn't need tools.
+        Returns True for greetings, thanks, simple acknowledgments.
+        """
+        simple_patterns = [
+            # English
+            "hello", "hi", "hey", "yo", "sup",
+            "thank", "thanks", "bye", "goodbye",
+            "ok", "okay", "yes", "no", "good", "great", "nice",
+            # Vietnamese
+            "xin chào", "chào", "chào bạn", "ê", "alo",
+            "cảm ơn", "cám ơn", "thanks", "tạm biệt", "bye",
+            "ok", "được", "tốt", "hay", "ừ", "ờ", "vâng", "dạ",
+        ]
+        msg_lower = message.lower().strip()
+        # Very short messages are likely greetings
+        if len(msg_lower) < 15:
+            for pattern in simple_patterns:
+                if pattern in msg_lower:
+                    return True
+            # Also check if message is just a single word greeting
+            if msg_lower in simple_patterns:
+                return True
+        return False
     async def _plan_tool_calls(
         self,
         message: str,
         Analyze message and plan which tools to call.
         Returns list of ToolCall objects with tool_name and arguments.
+        Returns empty list for simple greetings (no tools needed).
         """
+        # Early exit for greetings - no tools needed
+        if self._is_greeting_or_simple_query(message) and not image_url:
+            agent_logger.workflow_step("Greeting detected", "Skipping tools")
+            return []
         tool_calls = []
         # If image is provided, always use visual search
                 arguments={"query": message, "limit": 5},
             ))
         return tool_calls
     async def _execute_tool(
         tool_results: list[ToolCall],
         image_url: str | None = None,
         history: str | None = None,
+    ) -> tuple[str, list[str]]:
+        """
+        Synthesize final response from tool results with conversation history.
+        Returns:
+            Tuple of (response_text, selected_place_ids)
+        """
+        # Collect all available place_ids from tool results
+        all_place_ids = []
+        for tool_call in tool_results:
+            if tool_call.result:
+                for item in tool_call.result:
+                    if isinstance(item, dict) and 'place_id' in item:
+                        all_place_ids.append(item['place_id'])
+        # If no tool results (greeting case), return simple response
+        if not tool_results:
+            # Build history section if available
+            history_section = ""
+            if history:
+                history_section = f"Lịch sử hội thoại:\n{history}\n\n---\n"
+            prompt = f"""{history_section}User nói: "{message}"
+Hãy trả lời thân thiện bằng tiếng Việt. Đây là lời chào hoặc tin nhắn đơn giản, không cần tìm kiếm địa điểm."""
+            response = await self.llm_client.generate(
+                prompt=prompt,
+                temperature=0.7,
+                system_instruction="Bạn là LocalMate - trợ lý du lịch thân thiện cho Đà Nẵng. Trả lời ngắn gọn, thân thiện.",
+            )
+            return response, []
         # Build context from tool results
         context_parts = []
         for tool_call in tool_results:
                     f"Kết quả từ {tool_call.tool_name}:\n{json.dumps(tool_call.result, ensure_ascii=False, indent=2)}"
                 )
+        context = "\n\n".join(context_parts)
         # Build history section if available
         history_section = ""
 ---
 """
+        # Generate response using LLM with JSON format for place selection
+        prompt = f"""{history_section}Dựa trên kết quả tìm kiếm sau, hãy trả lời câu hỏi của người dùng.
 Câu hỏi hiện tại: {message}
 {context}
+**QUAN TRỌNG:** Trả lời theo format JSON:
+```json
+{{
+  "response": "Câu trả lời tiếng Việt, thân thiện. Giới thiệu top 2-3 địa điểm phù hợp nhất.",
+  "selected_place_ids": ["place_id_1", "place_id_2", "place_id_3"]
+}}
+```
+Chỉ chọn những place_id xuất hiện trong kết quả tìm kiếm ở trên. Nếu không có địa điểm phù hợp, để mảng rỗng.
 Nếu có lịch sử hội thoại, hãy cân nhắc ngữ cảnh trước đó khi trả lời."""
         agent_logger.llm_call(self.provider, self.model or "default", prompt[:100])
+        raw_response = await self.llm_client.generate(
             prompt=prompt,
             temperature=0.7,
             system_instruction=SYSTEM_PROMPT,
         )
+        # Parse JSON response
+        try:
+            # Extract JSON from code blocks
+            json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', raw_response, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(1)
+            else:
+                # Try to find raw JSON
+                json_start = raw_response.find('{')
+                json_end = raw_response.rfind('}')
+                if json_start != -1 and json_end != -1:
+                    json_str = raw_response[json_start:json_end + 1]
+                else:
+                    # No JSON found, return raw response
+                    return raw_response, []
+            data = json.loads(json_str)
+            text_response = data.get("response", raw_response)
+            selected_ids = data.get("selected_place_ids", [])
+            # Validate selected_ids are in available places
+            valid_ids = [pid for pid in selected_ids if pid in all_place_ids]
+            return text_response, valid_ids
+        except (json.JSONDecodeError, KeyError) as e:
+            agent_logger.error("Failed to parse synthesis JSON", e)
+            # Fallback: return raw response with no places
+            return raw_response, []
     def _extract_location(self, message: str) -> str | None:
         """Extract location name from message using pattern matching."""

app/api/router.py CHANGED Viewed

@@ -405,30 +405,20 @@ async def chat(
             session_id=session_id,
         )
-        # Extract places from tool results if available
         places = []
-        if result.tool_results:
-            # Extract place_ids from ToolCall objects
-            place_ids = []
-            distance_map = {}  # Store distance info for nearby places
             for tool_call in result.tool_results:
-                # ToolCall has .result attribute which is a list of dicts
                 if tool_call.result:
                     for item in tool_call.result:
-                        if isinstance(item, dict) and 'place_id' in item:
-                            pid = item['place_id']
-                            if pid not in place_ids:  # Avoid duplicates
-                                place_ids.append(pid)
-                            # Capture distance if available (from find_nearby_places)
-                            if 'distance_km' in item:
-                                distance_map[pid] = item['distance_km']
-            if place_ids:
-                places = await enrich_places_from_ids(place_ids[:5], db)  # Limit to top 5
-                # Add distance info to places
-                for place in places:
-                    if place.place_id in distance_map:
-                        place.distance_km = distance_map[place.place_id]
         return ChatResponse(
             response=result.response,

             session_id=session_id,
         )
+        # Use LLM-selected places (same pattern as ReAct mode)
         places = []
+        if result.selected_place_ids:
+            places = await enrich_places_from_ids(result.selected_place_ids, db)
+            # Add distance info if available from tool results
+            distance_map = {}
             for tool_call in result.tool_results:
                 if tool_call.result:
                     for item in tool_call.result:
+                        if isinstance(item, dict) and 'place_id' in item and 'distance_km' in item:
+                            distance_map[item['place_id']] = item['distance_km']
+            for place in places:
+                if place.place_id in distance_map:
+                    place.distance_km = distance_map[place.place_id]
         return ChatResponse(
             response=result.response,

app/shared/integrations/key_rotator.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""Thread-safe API Key Rotator for load balancing across multiple keys.
+This module provides a round-robin key rotation mechanism to distribute
+API requests across multiple keys, helping to avoid rate limits.
+Usage:
+    from app.shared.integrations.key_rotator import megallm_key_rotator
+    api_key = megallm_key_rotator.get_next_key()
+"""
+import logging
+import os
+import threading
+from typing import List
+from dotenv import load_dotenv
+# Ensure .env is loaded before accessing os.environ
+load_dotenv()
+logger = logging.getLogger(__name__)
+class KeyRotator:
+    """Thread-safe round-robin API key rotator.
+    Distributes API calls across multiple keys to avoid per-key rate limits.
+    Each call to get_next_key() returns the next key in rotation.
+    Attributes:
+        _keys: List of API keys to rotate through
+        _index: Current position in rotation
+        _lock: Thread lock for safe concurrent access
+    """
+    def __init__(self, keys: List[str], name: str = "default"):
+        """Initialize the key rotator.
+        Args:
+            keys: List of API keys (must have at least one)
+            name: Name for logging identification
+        Raises:
+            ValueError: If keys list is empty
+        """
+        if not keys:
+            raise ValueError("At least one API key is required")
+        self._keys = keys
+        self._name = name
+        self._index = 0
+        self._lock = threading.Lock()
+        self._request_count = 0
+        logger.info(f"[KeyRotator:{name}] Initialized with {len(keys)} API keys")
+    def get_next_key(self) -> str:
+        """Get next API key in rotation (thread-safe).
+        Returns:
+            The next API key in round-robin order
+        """
+        with self._lock:
+            key = self._keys[self._index]
+            key_index = self._index + 1  # 1-based for logging
+            self._index = (self._index + 1) % len(self._keys)
+            self._request_count += 1
+            # Log rotation (mask key for security, only show last 8 chars)
+            masked_key = f"...{key[-8:]}" if len(key) > 8 else key
+            logger.info(
+                f"[KeyRotator:{self._name}] Request #{self._request_count} "
+                f"using key {key_index}/{len(self._keys)} ({masked_key})"
+            )
+            return key
+    @property
+    def total_keys(self) -> int:
+        """Number of keys in rotation."""
+        return len(self._keys)
+    @property
+    def request_count(self) -> int:
+        """Total number of requests made through this rotator."""
+        return self._request_count
+    def get_stats(self) -> dict:
+        """Get rotation statistics for debugging."""
+        return {
+            "name": self._name,
+            "total_keys": len(self._keys),
+            "current_index": self._index,
+            "total_requests": self._request_count,
+        }
+def load_megallm_keys() -> List[str]:
+    """Load all MEGALLM_API_KEY_* from environment variables.
+    Looks for keys in format: MEGALLM_API_KEY_1, MEGALLM_API_KEY_2, etc.
+    Falls back to single MEGALLM_API_KEY for backward compatibility.
+    Returns:
+        List of API keys found in environment
+    """
+    keys = []
+    i = 1
+    # Load numbered keys (MEGALLM_API_KEY_1, MEGALLM_API_KEY_2, ...)
+    while True:
+        key = os.environ.get(f"MEGALLM_API_KEY_{i}")
+        if not key:
+            break
+        keys.append(key)
+        i += 1
+    # Fallback to single key for backward compatibility
+    if not keys:
+        single_key = os.environ.get("MEGALLM_API_KEY")
+        if single_key:
+            keys = [single_key]
+            logger.warning(
+                "[KeyRotator] Using legacy MEGALLM_API_KEY. "
+                "Consider migrating to MEGALLM_API_KEY_1, MEGALLM_API_KEY_2, etc."
+            )
+    if keys:
+        logger.info(f"[KeyRotator] Loaded {len(keys)} MegaLLM API key(s)")
+    else:
+        logger.warning("[KeyRotator] No MegaLLM API keys found in environment")
+    return keys
+# Singleton instance for MegaLLM key rotation
+_megallm_keys = load_megallm_keys()
+megallm_key_rotator: KeyRotator | None = None
+if _megallm_keys:
+    megallm_key_rotator = KeyRotator(_megallm_keys, name="MegaLLM")

app/shared/integrations/megallm_client.py CHANGED Viewed

@@ -1,8 +1,13 @@
-"""MegaLLM client using OpenAI-compatible API with retry logic."""
 import httpx
 from app.core.config import settings
 # Timeout configuration for DeepSeek reasoning models (can take longer)
 REQUEST_TIMEOUT = httpx.Timeout(
@@ -14,13 +19,21 @@ REQUEST_TIMEOUT = httpx.Timeout(
 class MegaLLMClient:
-    """Client for MegaLLM (OpenAI-compatible API) operations."""
     def __init__(self, model: str | None = None):
         """Initialize with optional model override."""
         self.model = model or settings.default_megallm_model
-        self.api_key = settings.megallm_api_key
         self.base_url = settings.megallm_base_url
     async def generate(
         self,
@@ -41,8 +54,8 @@ class MegaLLMClient:
         Returns:
             Generated text
         """
-        if not self.api_key:
-            raise ValueError("MEGALLM_API_KEY is not configured")
         messages = []
         if system_instruction:
@@ -56,7 +69,7 @@ class MegaLLMClient:
                     response = await client.post(
                         f"{self.base_url}/chat/completions",
                         headers={
-                            "Authorization": f"Bearer {self.api_key}",
                             "Content-Type": "application/json",
                         },
                         json={
@@ -97,8 +110,8 @@ class MegaLLMClient:
         Returns:
             Generated text response
         """
-        if not self.api_key:
-            raise ValueError("MEGALLM_API_KEY is not configured")
         chat_messages = []
         if system_instruction:
@@ -114,7 +127,7 @@ class MegaLLMClient:
             response = await client.post(
                 f"{self.base_url}/chat/completions",
                 headers={
-                    "Authorization": f"Bearer {self.api_key}",
                     "Content-Type": "application/json",
                 },
                 json={

+"""MegaLLM client using OpenAI-compatible API with retry logic and key rotation."""
+import logging
 import httpx
 from app.core.config import settings
+from app.shared.integrations.key_rotator import megallm_key_rotator
+logger = logging.getLogger(__name__)
 # Timeout configuration for DeepSeek reasoning models (can take longer)
 REQUEST_TIMEOUT = httpx.Timeout(
 class MegaLLMClient:
+    """Client for MegaLLM (OpenAI-compatible API) operations with key rotation."""
     def __init__(self, model: str | None = None):
         """Initialize with optional model override."""
         self.model = model or settings.default_megallm_model
         self.base_url = settings.megallm_base_url
+    def _get_api_key(self) -> str:
+        """Get API key using rotation or fallback to settings."""
+        if megallm_key_rotator:
+            return megallm_key_rotator.get_next_key()
+        # Fallback to settings (backward compatibility)
+        if settings.megallm_api_key:
+            return settings.megallm_api_key
+        raise ValueError("No MegaLLM API keys configured")
     async def generate(
         self,
         Returns:
             Generated text
         """
+        # Get rotated API key
+        api_key = self._get_api_key()
         messages = []
         if system_instruction:
                     response = await client.post(
                         f"{self.base_url}/chat/completions",
                         headers={
+                            "Authorization": f"Bearer {api_key}",
                             "Content-Type": "application/json",
                         },
                         json={
         Returns:
             Generated text response
         """
+        # Get rotated API key
+        api_key = self._get_api_key()
         chat_messages = []
         if system_instruction:
             response = await client.post(
                 f"{self.base_url}/chat/completions",
                 headers={
+                    "Authorization": f"Bearer {api_key}",
                     "Content-Type": "application/json",
                 },
                 json={

tests/test_key_rotation.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""Unit tests for MegaLLM API Key Rotation.
+Run with:
+    cd /Volumes/WorkSpace/Project/LocalMate/localmate-danang-backend-v2
+    python -m pytest tests/test_key_rotation.py -v
+"""
+import os
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from unittest.mock import patch
+import pytest
+class TestKeyRotator:
+    """Tests for KeyRotator class."""
+    def test_rotation_cycles_through_keys(self):
+        """Verify round-robin cycles through all keys in order."""
+        from app.shared.integrations.key_rotator import KeyRotator
+        keys = ["key_1", "key_2", "key_3"]
+        rotator = KeyRotator(keys, name="test")
+        # First cycle
+        assert rotator.get_next_key() == "key_1"
+        assert rotator.get_next_key() == "key_2"
+        assert rotator.get_next_key() == "key_3"
+        # Second cycle (should loop back)
+        assert rotator.get_next_key() == "key_1"
+        assert rotator.get_next_key() == "key_2"
+        assert rotator.get_next_key() == "key_3"
+        # Verify request count
+        assert rotator.request_count == 6
+    def test_single_key_always_returns_same(self):
+        """Verify single key mode works correctly."""
+        from app.shared.integrations.key_rotator import KeyRotator
+        keys = ["only_key"]
+        rotator = KeyRotator(keys, name="single")
+        for _ in range(5):
+            assert rotator.get_next_key() == "only_key"
+        assert rotator.request_count == 5
+    def test_empty_keys_raises_error(self):
+        """Verify empty keys list raises ValueError."""
+        from app.shared.integrations.key_rotator import KeyRotator
+        with pytest.raises(ValueError, match="At least one API key is required"):
+            KeyRotator([], name="empty")
+    def test_rotation_thread_safety(self):
+        """Verify rotation is thread-safe under concurrent access."""
+        from app.shared.integrations.key_rotator import KeyRotator
+        keys = ["key_1", "key_2", "key_3"]
+        rotator = KeyRotator(keys, name="threaded")
+        results = []
+        lock = threading.Lock()
+        def get_key():
+            key = rotator.get_next_key()
+            with lock:
+                results.append(key)
+        # Run 100 concurrent requests
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            futures = [executor.submit(get_key) for _ in range(100)]
+            for future in futures:
+                future.result()
+        # Should have 100 results
+        assert len(results) == 100
+        assert rotator.request_count == 100
+        # Each key should be used roughly equally (with some variance due to threading)
+        for key in keys:
+            count = results.count(key)
+            # Should be approximately 33 each, allow 20% variance
+            assert 20 <= count <= 45, f"Key {key} used {count} times (expected ~33)"
+    def test_get_stats(self):
+        """Verify stats reporting works."""
+        from app.shared.integrations.key_rotator import KeyRotator
+        keys = ["key_1", "key_2"]
+        rotator = KeyRotator(keys, name="stats_test")
+        rotator.get_next_key()
+        rotator.get_next_key()
+        rotator.get_next_key()
+        stats = rotator.get_stats()
+        assert stats["name"] == "stats_test"
+        assert stats["total_keys"] == 2
+        assert stats["total_requests"] == 3
+        assert stats["current_index"] == 1  # After 3 requests: 0->1->0->1
+class TestLoadMegaLLMKeys:
+    """Tests for environment-based key loading."""
+    def test_load_numbered_keys(self):
+        """Verify loading MEGALLM_API_KEY_1, _2, _3 format."""
+        env_vars = {
+            "MEGALLM_API_KEY_1": "first_key",
+            "MEGALLM_API_KEY_2": "second_key",
+            "MEGALLM_API_KEY_3": "third_key",
+        }
+        with patch.dict(os.environ, env_vars, clear=False):
+            from importlib import reload
+            from app.shared.integrations import key_rotator
+            reload(key_rotator)
+            keys = key_rotator.load_megallm_keys()
+            assert keys == ["first_key", "second_key", "third_key"]
+    def test_load_fallback_single_key(self):
+        """Verify fallback to MEGALLM_API_KEY (legacy format)."""
+        # Clear any numbered keys
+        env_vars = {
+            "MEGALLM_API_KEY": "legacy_key",
+        }
+        # Remove any numbered keys that might exist
+        for i in range(1, 10):
+            env_vars[f"MEGALLM_API_KEY_{i}"] = ""
+        with patch.dict(os.environ, env_vars, clear=False):
+            from importlib import reload
+            from app.shared.integrations import key_rotator
+            reload(key_rotator)
+            # Note: This test may need adjustment based on environment state
+            keys = key_rotator.load_megallm_keys()
+            # Should have at least the legacy key if no numbered keys
+            assert len(keys) >= 1
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])