Final_Assignment_Template

Sleeping

App Files Files Community

i-dhilip commited on May 15, 2025

Commit

45fb8fa

verified ·

1 Parent(s): 362c28f

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -31

app.py CHANGED Viewed

@@ -54,25 +54,31 @@ class LangGraphAgent:
         if not OPENROUTER_API_KEY:
             raise ValueError("OPENROUTER_API_KEY is not set. Cannot initialize LLM.")
         if llm_choice == "llama":
             self.llm = ChatOpenAI(
-                model="meta-llama/llama-4-maverick:free",
                 api_key=OPENROUTER_API_KEY,
                 base_url="https://openrouter.ai/api/v1",
-                temperature=0.1, # Llama models can be sensitive to temperature
-                # max_tokens=150 # Llama 8B might benefit from a smaller max_token for concise answers
             )
-            print("Initialized Llama 4 Maverick.")
         elif llm_choice == "qwen":
             self.llm = ChatOpenAI(
-                model="qwen/qwq-32b:free",
                 api_key=OPENROUTER_API_KEY,
                 base_url="https://openrouter.ai/api/v1",
-                temperature=0.1 # Adding temperature for consistency
             )
-            print("Initialized Qwen 32B.")
         else:
-            raise ValueError(f"Unsupported LLM choice: {llm_choice}. Choose 'qwen' or 'llama'.")
         self.tools_map = {tool.name: tool for tool in tools}
         self.graph = self._build_graph()
@@ -99,6 +105,10 @@ class LangGraphAgent:
     def _should_call_tools(self, state: AgentState) -> str:
         print("LLM deciding next step...")
         last_message = state["messages"][-1]
         if hasattr(last_message, "tool_calls") and last_message.tool_calls:
             print(f"LLM decided to call tools: {last_message.tool_calls}")
@@ -107,10 +117,15 @@ class LangGraphAgent:
         return "end"
     def _call_llm(self, state: AgentState) -> Dict[str, Any]:
-        print("Calling LLM...")
-        # Bind tools to LLM for function calling
-        llm_with_tools = self.llm.bind_tools(tools)
-        response = llm_with_tools.invoke(state["messages"])
         print(f"LLM response: {response.content[:100]}...")
         return {"messages": [response]}
@@ -165,26 +180,45 @@ class LangGraphAgent:
             if final_graph_state and final_graph_state["messages"]:
                 for msg in reversed(final_graph_state["messages"]):
-                    if isinstance(msg, AIMessage) and not msg.tool_calls:
                         answer = msg.content.strip()
                         # Remove common prefixes that LLMs might add despite instructions
                         prefixes_to_remove = [
                             "FINAL ANSWER:", "The answer is", "Here is the answer:",
-                            "The final answer is", "Answer:", "Solution:"
                         ]
                         for prefix in prefixes_to_remove:
-                            if answer.upper().startswith(prefix.upper()):
                                 answer = answer[len(prefix):].strip()
-                        # Remove potential quotation marks if the answer is a single word/phrase
-                        if len(answer.split()) < 5: # Heuristic for short answers
-                            if answer.startswith(('"', "'")) and answer.endswith(('"', "'")):
-                                answer = answer[1:-1]
                         print(f"Agent returning answer: {answer}")
                         return answer
-                print("No suitable AI message found for final answer. Returning last message content.")
-                return str(final_graph_state["messages"][-1].content) if final_graph_state["messages"] else "Error: No messages in final state."
             else:
                 print("Error: Agent did not reach a final state or no messages found.")
                 return "Error: Agent did not produce a conclusive answer."
@@ -196,7 +230,7 @@ class LangGraphAgent:
             return f"Error during agent execution: {e}"
 # --- Main Evaluation Logic (Modified from starter) ---
-def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the LangGraphAgent on them, submits all answers,
     and displays the results.
@@ -212,17 +246,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not OPENROUTER_API_KEY:
          return "Error: OPENROUTER_API_KEY not found. Please set it in your .env file.", None
-    if not TAVILY_API_KEY:
-        print("Warning: TAVILY_API_KEY not found. Tavily search might not work as expected.")
-        # return "Error: TAVILY_API_KEY not found. Please set it in your .env file.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
-        # Default to Llama for now, can be made configurable later (e.g., via Gradio input)
-        agent = LangGraphAgent(llm_choice="llama")
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -315,7 +347,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# --- Gradio Interface (Mostly as provided) ---
 with gr.Blocks() as demo:
     gr.Markdown("# LangGraph GAIA Agent Evaluation Runner")
     gr.Markdown(
@@ -325,20 +356,28 @@ with gr.Blocks() as demo:
         2.  **Create a `.env` file** in the root of your space with your API keys:
             ```
             OPENROUTER_API_KEY="your_openrouter_api_key"
-            TAVILY_API_KEY="your_tavily_api_key" # Optional, but recommended for TavilySearch tool
             ```
         3.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        4.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
         -   Ensure your Hugging Face Space is public for the `agent_code` link to be verifiable.
         -   Submitting all answers can take some time as the agent processes each question.
-        -   This agent uses LangGraph with TavilySearch, Wikipedia, Arxiv, and Qwen via OpenRouter.
         """
     )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
@@ -346,6 +385,7 @@ with gr.Blocks() as demo:
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )

         if not OPENROUTER_API_KEY:
             raise ValueError("OPENROUTER_API_KEY is not set. Cannot initialize LLM.")
+        self.llm_choice = llm_choice
+        self.supports_tool_calling = False # Default to false
         if llm_choice == "llama":
             self.llm = ChatOpenAI(
+                model="meta-llama/llama-3.1-8b-instruct:free", # Corrected to Llama 3.1 as per user's earlier request
                 api_key=OPENROUTER_API_KEY,
                 base_url="https://openrouter.ai/api/v1",
+                temperature=0.1,
             )
+            # Llama 3.1 8B on OpenRouter might not support tool calling via the OpenAI SDK binding method
+            self.supports_tool_calling = False
+            print("Initialized Llama 3.1 8B Instruct (tool calling assumed NOT supported).")
         elif llm_choice == "qwen":
             self.llm = ChatOpenAI(
+                model="qwen/qwen-2-7b-instruct:free", # Using a Qwen-2 model as qwq-32b might be older
                 api_key=OPENROUTER_API_KEY,
                 base_url="https://openrouter.ai/api/v1",
+                temperature=0.1
             )
+            # Qwen models on OpenRouter might not support tool calling via the OpenAI SDK binding method
+            self.supports_tool_calling = False
+            print("Initialized Qwen-2 7B Instruct (tool calling assumed NOT supported).")
         else:
+            raise ValueError(f"Unsupported LLM choice: {llm_choice}. Choose 'llama', or 'qwen'.")
         self.tools_map = {tool.name: tool for tool in tools}
         self.graph = self._build_graph()
     def _should_call_tools(self, state: AgentState) -> str:
         print("LLM deciding next step...")
+        if not self.supports_tool_calling:
+            print("Tool calling not supported by the current LLM. Ending interaction.")
+            return "end"
         last_message = state["messages"][-1]
         if hasattr(last_message, "tool_calls") and last_message.tool_calls:
             print(f"LLM decided to call tools: {last_message.tool_calls}")
         return "end"
     def _call_llm(self, state: AgentState) -> Dict[str, Any]:
+        print(f"Calling LLM ({self.llm_choice})...")
+        if self.supports_tool_calling:
+            print("Binding tools to LLM for function calling.")
+            llm_with_tools = self.llm.bind_tools(tools)
+            response = llm_with_tools.invoke(state["messages"])
+        else:
+            print("Invoking LLM without binding tools.")
+            response = self.llm.invoke(state["messages"])
         print(f"LLM response: {response.content[:100]}...")
         return {"messages": [response]}
             if final_graph_state and final_graph_state["messages"]:
                 for msg in reversed(final_graph_state["messages"]):
+                    if isinstance(msg, AIMessage) and not msg.tool_calls and msg.content: # Ensure content exists
                         answer = msg.content.strip()
+                        if not answer: # Skip empty answers after initial stripping
+                            continue
                         # Remove common prefixes that LLMs might add despite instructions
                         prefixes_to_remove = [
                             "FINAL ANSWER:", "The answer is", "Here is the answer:",
+                            "The final answer is", "Answer:", "Solution:",
+                            "The direct answer is", "Here's the concise answer:",
+                            "Here you go:", "Certainly, the answer is"
                         ]
                         for prefix in prefixes_to_remove:
+                            # Case-insensitive prefix removal
+                            if answer.lower().startswith(prefix.lower()):
                                 answer = answer[len(prefix):].strip()
+                        # More robust quote stripping
+                        if answer.startswith(("\"", "'")) and answer.endswith(("\"", "'")):
+                            temp_answer = answer[1:-1]
+                            # Avoid stripping if it's a legitimately quoted string like "'quoted string'" as the answer itself
+                            if not (temp_answer.startswith(("\"", "'")) and temp_answer.endswith(("\"", "'"))):
+                                answer = temp_answer
+                        if not answer: # Check again if answer became empty after stripping
+                            continue
                         print(f"Agent returning answer: {answer}")
                         return answer
+                # Refined fallback logic
+                print("No suitable AI message with valid content found after processing. Attempting to return last raw AI message if available.")
+                last_ai_msg_content = next((m.content.strip() for m in reversed(final_graph_state["messages"]) if isinstance(m, AIMessage) and m.content and not m.tool_calls), None)
+                if last_ai_msg_content:
+                     print(f"Agent returning last raw AI message as fallback: {last_ai_msg_content}")
+                     return last_ai_msg_content
+                print("No suitable AI message found for final answer, even as fallback.")
+                return "Error: Agent could not extract a valid answer." # More specific error
             else:
                 print("Error: Agent did not reach a final state or no messages found.")
                 return "Error: Agent did not produce a conclusive answer."
             return f"Error during agent execution: {e}"
 # --- Main Evaluation Logic (Modified from starter) ---
+def run_and_submit_all(profile: gr.OAuthProfile | None, llm_model_choice: str):
     """
     Fetches all questions, runs the LangGraphAgent on them, submits all answers,
     and displays the results.
     if not OPENROUTER_API_KEY:
          return "Error: OPENROUTER_API_KEY not found. Please set it in your .env file.", None
+    # TAVILY_API_KEY check is handled by the tool initialization itself with a warning.
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    print(f"Attempting to initialize agent with LLM: {llm_model_choice}")
     try:
+        agent = LangGraphAgent(llm_choice=llm_model_choice)
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 with gr.Blocks() as demo:
     gr.Markdown("# LangGraph GAIA Agent Evaluation Runner")
     gr.Markdown(
         2.  **Create a `.env` file** in the root of your space with your API keys:
             ```
             OPENROUTER_API_KEY="your_openrouter_api_key"
+            TAVILY_API_KEY="your_tavily_api_key" # Optional, but TavilySearch tool won't work without it
             ```
         3.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        4.  **Select the LLM model** you want the agent to use.
+        5.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
         -   Ensure your Hugging Face Space is public for the `agent_code` link to be verifiable.
         -   Submitting all answers can take some time as the agent processes each question.
+        -   The agent will use the selected LLM. Note that only some models (e.g., llama) support tool/function calling. If a model without tool support is chosen for a task requiring tools, it may not perform optimally or might not use tools.
         """
     )
     gr.LoginButton()
+    llm_choice_dropdown = gr.Dropdown(
+        choices=["llama", "qwen"],
+        value="llama", # Default to llama as it supports tool calling
+        label="Select LLM Model",
+        info="Choose the Large Language Model for the agent."
+    )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     run_button.click(
         fn=run_and_submit_all,
+        inputs=[llm_choice_dropdown], # Add llm_choice_dropdown as an input
         outputs=[status_output, results_table]
     )