Final_Assignment_Template

Running

App Files Files Community

Paperbag commited on Mar 23

Commit

53e9378

1 Parent(s): 60d960d

feat: Upgrade Gemini model, reorder model fallback tiers, enhance error handling, and add image tool forcing with a new test.

Browse files

Files changed (4) hide show

__pycache__/agent.cpython-312.pyc +0 -0
agent.py +17 -7
app copy.py +1 -1
test_image_tool.py +30 -0

__pycache__/agent.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/agent.cpython-312.pyc and b/__pycache__/agent.cpython-312.pyc differ

agent.py CHANGED Viewed

@@ -55,7 +55,7 @@ model = ChatGroq(
     max_retries=2,
 )
-# OpenRouter Fallback Model (used when Groq hits rate limits)
 openrouter_model = ChatOpenAI(
     model="meta-llama/llama-3.3-70b-instruct",
     openai_api_key=os.getenv("OPENROUTER_API_KEY"),
@@ -65,7 +65,7 @@ openrouter_model = ChatOpenAI(
 # Google AI Studio Fallback Model (Gemini)
 gemini_model = ChatGoogleGenerativeAI(
-    model="gemini-1.5-pro",
     # google_api_key is automatically picked up from GOOGLE_API_KEY environment variable
     temperature=0,
 )
@@ -80,9 +80,9 @@ def smart_invoke(msgs, use_tools=False):
     tertiary = gemini_with_tools if use_tools else gemini_model
     tiers = [
-        {"name": "Groq", "model": primary, "key": "GROQ_API_KEY"},
         {"name": "OpenRouter", "model": secondary, "key": "OPENROUTER_API_KEY"},
         {"name": "Gemini", "model": tertiary, "key": "GOOGLE_API_KEY"},
     ]
     last_exception = None
@@ -94,8 +94,8 @@ def smart_invoke(msgs, use_tools=False):
             return tier["model"].invoke(msgs)
         except Exception as e:
             err_str = str(e).lower()
-            # Catch rate limits or generic temporary server failures
-            if any(x in err_str for x in ["rate_limit", "429", "500", "503", "overloaded"]):
                 print(f"--- {tier['name']} Error: {e}. Falling back... ---")
                 last_exception = e
                 continue
@@ -173,8 +173,13 @@ def analyze_image(image_path: str, question: str) -> str:
         with open(image_path, "rb") as image_file:
             encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
-        # Create a separate Vision LLM call specific to the image
-        vision_model = ChatGroq(model="llama-3.2-90b-vision-preview", temperature=0)
         message = HumanMessage(
             content=[
@@ -411,6 +416,11 @@ def answer_message(state: AgentState) -> AgentState:
     """)]
     messages = prompt + messages
     # Multi-step ReAct Loop (Up to 8 reasoning steps)
     max_steps = 8
     draft_response = None

     max_retries=2,
 )
+# OpenRouter Model (Primary Fallback)
 openrouter_model = ChatOpenAI(
     model="meta-llama/llama-3.3-70b-instruct",
     openai_api_key=os.getenv("OPENROUTER_API_KEY"),
 # Google AI Studio Fallback Model (Gemini)
 gemini_model = ChatGoogleGenerativeAI(
+    model="gemini-2.5-flash",
     # google_api_key is automatically picked up from GOOGLE_API_KEY environment variable
     temperature=0,
 )
     tertiary = gemini_with_tools if use_tools else gemini_model
     tiers = [
         {"name": "OpenRouter", "model": secondary, "key": "OPENROUTER_API_KEY"},
         {"name": "Gemini", "model": tertiary, "key": "GOOGLE_API_KEY"},
+        {"name": "Groq", "model": primary, "key": "GROQ_API_KEY"},
     ]
     last_exception = None
             return tier["model"].invoke(msgs)
         except Exception as e:
             err_str = str(e).lower()
+            # Catch rate limits, generic temporary server failures, or missing models
+            if any(x in err_str for x in ["rate_limit", "429", "500", "503", "overloaded", "not_found", "404"]):
                 print(f"--- {tier['name']} Error: {e}. Falling back... ---")
                 last_exception = e
                 continue
         with open(image_path, "rb") as image_file:
             encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
+        # Use OpenRouter for Vision as a more robust fallback
+        vision_model = ChatOpenAI(
+            model="google/gemini-2.0-flash-001",
+            openai_api_key=os.getenv("OPENROUTER_API_KEY"),
+            openai_api_base="https://openrouter.ai/api/v1",
+            temperature=0,
+        )
         message = HumanMessage(
             content=[
     """)]
     messages = prompt + messages
+    # Force tool usage if image path is detected
+    for msg in state["messages"]:
+        if isinstance(msg, HumanMessage) and "[Attached File Local Path:" in msg.content:
+            messages.append(HumanMessage(content="IMPORTANT: I see an image path in the message. I MUST call the analyze_image tool IMMEDIATELY in my next step to see it."))
     # Multi-step ReAct Loop (Up to 8 reasoning steps)
     max_steps = 8
     draft_response = None

app copy.py CHANGED Viewed

@@ -57,7 +57,7 @@ questions_url = f"{DEFAULT_API_URL}/questions"
 response = requests.get(questions_url, timeout=15)
 response.raise_for_status()
 questions_data = response.json()
-for item in questions_data[:5]:
     question_text = item.get("question")
     if question_text is None:
         continue

 response = requests.get(questions_url, timeout=15)
 response.raise_for_status()
 questions_data = response.json()
+for item in questions_data[3:4]:
     question_text = item.get("question")
     if question_text is None:
         continue

test_image_tool.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+from agent import build_graph
+from langchain_core.messages import HumanMessage, ToolMessage
+from dotenv import load_dotenv
+load_dotenv()
+def test_image_process():
+    graph = build_graph()
+    question = "Review the chess position in the image: [Attached File Local Path: C:\\Users\\Admin\\.cache\\huggingface\\hub\\datasets--gaia-benchmark--GAIA\\snapshots\\682dd723ee1e1697e00360edccf2366dc8418dd9\\2023\\validation\\cca530fc-4052-43b2-b130-b30968d8aa44.png]"
+    print(f"--- Testing with question: {question} ---")
+    try:
+        result = graph.invoke({"messages": [HumanMessage(content=question)]})
+        # Log flow
+        for msg in result["messages"]:
+            if hasattr(msg, "tool_calls") and msg.tool_calls:
+                print(f"Model called tool: {msg.tool_calls[0]['name']}")
+            elif isinstance(msg, ToolMessage):
+                print(f"Tool returned: {msg.content[:100]}...")
+            elif hasattr(msg, "content") and msg.content:
+                if "FINAL ANSWER" in msg.content:
+                    print(f"Final Answer Found: {msg.content}")
+    except Exception as e:
+        print(f"Error: {e}")
+if __name__ == "__main__":
+    test_image_process()