Final_Assignment_Template

Running

App Files Files Community

Paperbag commited on Mar 23

Commit

3fc1414

1 Parent(s): 825865b

fix nvidia

Browse files

Files changed (3) hide show

__pycache__/agent.cpython-312.pyc +0 -0
agent.py +5 -2
verify_fixes.py +72 -0

__pycache__/agent.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/agent.cpython-312.pyc and b/__pycache__/agent.cpython-312.pyc differ

agent.py CHANGED Viewed

@@ -73,7 +73,7 @@ gemini_model = ChatGoogleGenerativeAI(
 # NVIDIA Model (Secondary Fallback)
 nvidia_model = ChatOpenAI(
-    model="nvidia/llama-3.1-405b-instruct",
     openai_api_key=os.getenv("NVIDIA_API_KEY"),
     openai_api_base="https://integrate.api.nvidia.com/v1",
     temperature=0,
@@ -138,8 +138,11 @@ def smart_invoke(msgs, use_tools=False, start_tier=0):
                 # Catch other fallback triggers
                 if any(x in err_str for x in ["rate_limit", "429", "500", "503", "overloaded", "not_found", "404", "402", "credits"]):
-                    print(f"--- {tier['name']} Error: {e}. Falling back... ---")
                     last_exception = e
                     break # Move to next tier
                 raise e

 # NVIDIA Model (Secondary Fallback)
 nvidia_model = ChatOpenAI(
+    model="meta/llama-3.1-405b-instruct",
     openai_api_key=os.getenv("NVIDIA_API_KEY"),
     openai_api_base="https://integrate.api.nvidia.com/v1",
     temperature=0,
                 # Catch other fallback triggers
                 if any(x in err_str for x in ["rate_limit", "429", "500", "503", "overloaded", "not_found", "404", "402", "credits"]):
+                    print(f"--- {tier['name']} Error: {e}. Trying next model/tier... ---")
                     last_exception = e
+                    # If this tier has more alternatives, continue to the next one
+                    if current_model != models_to_try[-1]:
+                        continue
                     break # Move to next tier
                 raise e

verify_fixes.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+import sys
+from unittest.mock import MagicMock, patch
+# Mocking modules
+sys.modules['cv2'] = MagicMock()
+sys.modules['whisper'] = MagicMock()
+# Set dummy env vars
+os.environ["OPENROUTER_API_KEY"] = "dummy"
+os.environ["GOOGLE_API_KEY"] = "dummy"
+os.environ["GROQ_API_KEY"] = "dummy"
+os.environ["NVIDIA_API_KEY"] = "dummy"
+os.environ["VERCEL_API_KEY"] = "dummy"
+sys.path.append(os.getcwd())
+import agent
+from langchain_core.messages import HumanMessage
+def test_gemini_alternatives_on_rate_limit():
+    print("Testing Gemini alternatives on rate limit...")
+    # We need to mock ChatGoogleGenerativeAI to simulate rate limit on one instance but success on another
+    # Since they are created inside smart_invoke, we patch the class constructor or just the instances if we can
+    with patch('agent.openrouter_model.invoke') as mock_openrouter, \
+         patch('agent.ChatGoogleGenerativeAI') as mock_gemini_class:
+        # OpenRouter fails
+        mock_openrouter.side_effect = Exception("Rate limit (429)")
+        # First Gemini call (primary) fails with rate limit
+        # Second Gemini call (alternative) succeeds
+        mock_primary = MagicMock()
+        mock_primary.invoke.side_effect = Exception("Rate limit (429)")
+        mock_primary.model = "gemini-2.5-flash"
+        mock_alt = MagicMock()
+        mock_alt.invoke.return_value = MagicMock(content="Gemini alternative response")
+        mock_alt.model = "gemini-2.5-flash-lite"
+        # Control the sequence of ChatGoogleGenerativeAI creation
+        # agent.py creates gemini_model at top level, then potentially more in smart_invoke
+        mock_gemini_class.side_effect = [mock_alt] # The one created in the loop
+        # We also need to mock the already created gemini_model
+        with patch('agent.gemini_model', mock_primary):
+            msgs = [HumanMessage(content="Hello")]
+            response, tier_idx = agent.smart_invoke(msgs, use_tools=False)
+            print(f"Response from tier {tier_idx}: {response.content}")
+            # Tier 1 is Gemini
+            assert tier_idx == 1
+            assert response.content == "Gemini alternative response"
+            print("Gemini alternative on rate limit successful!")
+def test_nvidia_name():
+    print("Checking NVIDIA model name...")
+    assert agent.nvidia_model.model_name == "meta/llama-3.1-405b-instruct"
+    print("NVIDIA model name is correct!")
+if __name__ == "__main__":
+    try:
+        test_gemini_alternatives_on_rate_limit()
+        test_nvidia_name()
+        print("All fix tests passed!")
+    except Exception as e:
+        print(f"Test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)