Paperbag commited on
Commit
825865b
·
1 Parent(s): 5b69a26

feat: Reorder `smart_invoke` fallback to OpenRouter-Gemini-Groq, add adaptive Gemini model selection, and persist API tier for subsequent calls.

Browse files
.gitignore CHANGED
@@ -1,2 +1,4 @@
1
  .env
2
- .cursorignore
 
 
 
1
  .env
2
+ .cursorignore
3
+ .venv_old
4
+ .venv
__pycache__/agent.cpython-312.pyc CHANGED
Binary files a/__pycache__/agent.cpython-312.pyc and b/__pycache__/agent.cpython-312.pyc differ
 
__pycache__/agent.cpython-39.pyc CHANGED
Binary files a/__pycache__/agent.cpython-39.pyc and b/__pycache__/agent.cpython-39.pyc differ
 
agent.py CHANGED
@@ -47,6 +47,7 @@ load_dotenv()
47
  # huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
48
  # )
49
 
 
50
  model = ChatGroq(
51
  model="meta-llama/llama-4-scout-17b-16e-instruct",
52
  temperature=0,
@@ -70,6 +71,23 @@ gemini_model = ChatGoogleGenerativeAI(
70
  temperature=0,
71
  )
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def smart_invoke(msgs, use_tools=False, start_tier=0):
74
  """
75
  Tiered fallback: OpenRouter -> Gemini -> Groq.
@@ -78,14 +96,18 @@ def smart_invoke(msgs, use_tools=False, start_tier=0):
78
  primary = model_with_tools if use_tools else model
79
  secondary = openrouter_with_tools if use_tools else openrouter_model
80
  tertiary = gemini_with_tools if use_tools else gemini_model
 
 
81
 
82
  # Adaptive Gemini names to try if 1.5 flash is 404
83
- gemini_alternatives = ["gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-3.1-flash-lite", "gemini-3-flash"]
84
 
85
  tiers = [
86
  {"name": "OpenRouter", "model": secondary, "key": "OPENROUTER_API_KEY"},
87
  {"name": "Gemini", "model": tertiary, "key": "GOOGLE_API_KEY", "alternatives": gemini_alternatives},
88
  {"name": "Groq", "model": primary, "key": "GROQ_API_KEY"},
 
 
89
  ]
90
 
91
  last_exception = None
@@ -410,6 +432,8 @@ tools_by_name = {tool.name: tool for tool in tools}
410
  model_with_tools = model.bind_tools(tools)
411
  openrouter_with_tools = openrouter_model.bind_tools(tools)
412
  gemini_with_tools = gemini_model.bind_tools(tools)
 
 
413
 
414
  def answer_message(state: AgentState) -> AgentState:
415
  messages = state["messages"]
 
47
  # huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
48
  # )
49
 
50
+ # Groq Model (Primary)
51
  model = ChatGroq(
52
  model="meta-llama/llama-4-scout-17b-16e-instruct",
53
  temperature=0,
 
71
  temperature=0,
72
  )
73
 
74
+ # NVIDIA Model (Secondary Fallback)
75
+ nvidia_model = ChatOpenAI(
76
+ model="nvidia/llama-3.1-405b-instruct",
77
+ openai_api_key=os.getenv("NVIDIA_API_KEY"),
78
+ openai_api_base="https://integrate.api.nvidia.com/v1",
79
+ temperature=0,
80
+ )
81
+
82
+ # Vercel Model (Tertiary Fallback)
83
+ # Note: Adjust model and base_url if using a specific Vercel AI Gateway setup
84
+ vercel_model = ChatOpenAI(
85
+ model="meta-llama/llama-3.3-70b-instruct",
86
+ openai_api_key=os.getenv("VERCEL_API_KEY"),
87
+ openai_api_base="https://gateway.ai.vercel.com/v1",
88
+ temperature=0,
89
+ )
90
+
91
  def smart_invoke(msgs, use_tools=False, start_tier=0):
92
  """
93
  Tiered fallback: OpenRouter -> Gemini -> Groq.
 
96
  primary = model_with_tools if use_tools else model
97
  secondary = openrouter_with_tools if use_tools else openrouter_model
98
  tertiary = gemini_with_tools if use_tools else gemini_model
99
+ quaternary = nvidia_with_tools if use_tools else nvidia_model
100
+ quinary = vercel_with_tools if use_tools else vercel_model
101
 
102
  # Adaptive Gemini names to try if 1.5 flash is 404
103
+ gemini_alternatives = ["gemini-2.5-flash-lite", "gemma-3-1b", "gemini-3-flash", "gemini-3.1-flash-lite"]
104
 
105
  tiers = [
106
  {"name": "OpenRouter", "model": secondary, "key": "OPENROUTER_API_KEY"},
107
  {"name": "Gemini", "model": tertiary, "key": "GOOGLE_API_KEY", "alternatives": gemini_alternatives},
108
  {"name": "Groq", "model": primary, "key": "GROQ_API_KEY"},
109
+ {"name": "NVIDIA", "model": quaternary, "key": "NVIDIA_API_KEY"},
110
+ {"name": "Vercel", "model": quinary, "key": "VERCEL_API_KEY"},
111
  ]
112
 
113
  last_exception = None
 
432
  model_with_tools = model.bind_tools(tools)
433
  openrouter_with_tools = openrouter_model.bind_tools(tools)
434
  gemini_with_tools = gemini_model.bind_tools(tools)
435
+ nvidia_with_tools = nvidia_model.bind_tools(tools)
436
+ vercel_with_tools = vercel_model.bind_tools(tools)
437
 
438
  def answer_message(state: AgentState) -> AgentState:
439
  messages = state["messages"]
test_out.txt ADDED
Binary file (5.51 kB). View file
 
verify_fallback.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ # Mocking modules that might not be available or needed for this test
6
+ sys.modules['cv2'] = MagicMock()
7
+ sys.modules['whisper'] = MagicMock()
8
+
9
+ # Set dummy env vars BEFORE importing agent
10
+ os.environ["OPENROUTER_API_KEY"] = "dummy"
11
+ os.environ["GOOGLE_API_KEY"] = "dummy"
12
+ os.environ["GROQ_API_KEY"] = "dummy"
13
+ os.environ["NVIDIA_API_KEY"] = "dummy"
14
+ os.environ["VERCEL_API_KEY"] = "dummy"
15
+
16
+ # Add the current directory to path so we can import agent
17
+ sys.path.append(os.getcwd())
18
+
19
+ import agent
20
+ from langchain_core.messages import HumanMessage
21
+
22
+ def test_fallback_logic():
23
+ print("Testing fallback logic...")
24
+
25
+ # Mock the invoke method for each tier's model
26
+ # Tiers: 0:OpenRouter, 1:Gemini, 2:Groq, 3:NVIDIA, 4:Vercel
27
+
28
+ with patch('agent.openrouter_model.invoke') as mock_openrouter, \
29
+ patch('agent.gemini_model.invoke') as mock_gemini, \
30
+ patch('agent.model.invoke') as mock_groq, \
31
+ patch('agent.nvidia_model.invoke') as mock_nvidia, \
32
+ patch('agent.vercel_model.invoke') as mock_vercel:
33
+
34
+ # Simulate failure for all tiers up to NVIDIA
35
+ mock_openrouter.side_effect = Exception("Rate limit (429)")
36
+ mock_gemini.side_effect = Exception("Rate limit (429)")
37
+ mock_groq.side_effect = Exception("Rate limit (429)")
38
+
39
+ # NVIDIA should succeed
40
+ mock_nvidia.return_value = MagicMock(content="NVIDIA response")
41
+
42
+ msgs = [HumanMessage(content="Hello")]
43
+ response, tier_idx = agent.smart_invoke(msgs, use_tools=False)
44
+
45
+ print(f"Response from tier {tier_idx}: {response.content}")
46
+ assert tier_idx == 3
47
+ assert response.content == "NVIDIA response"
48
+ print("Fallback to NVIDIA successful!")
49
+
50
+ # Now simulate failure up to Vercel
51
+ mock_nvidia.side_effect = Exception("Rate limit (429)")
52
+ mock_vercel.return_value = MagicMock(content="Vercel response")
53
+
54
+ response, tier_idx = agent.smart_invoke(msgs, use_tools=False)
55
+ print(f"Response from tier {tier_idx}: {response.content}")
56
+ assert tier_idx == 4
57
+ assert response.content == "Vercel response"
58
+ print("Fallback to Vercel successful!")
59
+
60
+ if __name__ == "__main__":
61
+ try:
62
+ test_fallback_logic()
63
+ print("All fallback tests passed!")
64
+ except Exception as e:
65
+ print(f"Test failed: {e}")
66
+ import traceback
67
+ traceback.print_exc()
68
+ sys.exit(1)
verify_simple.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from unittest.mock import MagicMock
4
+
5
+ # Mocking modules
6
+ sys.modules['cv2'] = MagicMock()
7
+ sys.modules['whisper'] = MagicMock()
8
+
9
+ # Set dummy env vars
10
+ os.environ["OPENROUTER_API_KEY"] = "dummy"
11
+ os.environ["GOOGLE_API_KEY"] = "dummy"
12
+ os.environ["GROQ_API_KEY"] = "dummy"
13
+ os.environ["NVIDIA_API_KEY"] = "dummy"
14
+ os.environ["VERCEL_API_KEY"] = "dummy"
15
+
16
+ sys.path.append(os.getcwd())
17
+
18
+ import agent
19
+
20
+ def verify_tiers():
21
+ from langchain_core.messages import HumanMessage
22
+
23
+ # We can't easily call smart_invoke without real models unless we mock heavily.
24
+ # Let's just check the tiers list structure in a dummy call.
25
+ # Actually, we can't easily access 'tiers' inside smart_invoke as it's a local variable.
26
+
27
+ # Let's check the global model objects.
28
+ print(f"NVIDIA model initialized: {agent.nvidia_model is not None}")
29
+ print(f"Vercel model initialized: {agent.vercel_model is not None}")
30
+
31
+ # Check if they have invoke (they should)
32
+ print(f"NVIDIA model hasattr invoke: {hasattr(agent.nvidia_model, 'invoke')}")
33
+ print(f"Vercel model hasattr invoke: {hasattr(agent.vercel_model, 'invoke')}")
34
+
35
+ if __name__ == "__main__":
36
+ verify_tiers()