i-dhilip commited on
Commit
45fb8fa
·
verified ·
1 Parent(s): 362c28f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -31
app.py CHANGED
@@ -54,25 +54,31 @@ class LangGraphAgent:
54
  if not OPENROUTER_API_KEY:
55
  raise ValueError("OPENROUTER_API_KEY is not set. Cannot initialize LLM.")
56
 
 
 
 
57
  if llm_choice == "llama":
58
  self.llm = ChatOpenAI(
59
- model="meta-llama/llama-4-maverick:free",
60
  api_key=OPENROUTER_API_KEY,
61
  base_url="https://openrouter.ai/api/v1",
62
- temperature=0.1, # Llama models can be sensitive to temperature
63
- # max_tokens=150 # Llama 8B might benefit from a smaller max_token for concise answers
64
  )
65
- print("Initialized Llama 4 Maverick.")
 
 
66
  elif llm_choice == "qwen":
67
  self.llm = ChatOpenAI(
68
- model="qwen/qwq-32b:free",
69
  api_key=OPENROUTER_API_KEY,
70
  base_url="https://openrouter.ai/api/v1",
71
- temperature=0.1 # Adding temperature for consistency
72
  )
73
- print("Initialized Qwen 32B.")
 
 
74
  else:
75
- raise ValueError(f"Unsupported LLM choice: {llm_choice}. Choose 'qwen' or 'llama'.")
76
 
77
  self.tools_map = {tool.name: tool for tool in tools}
78
  self.graph = self._build_graph()
@@ -99,6 +105,10 @@ class LangGraphAgent:
99
 
100
  def _should_call_tools(self, state: AgentState) -> str:
101
  print("LLM deciding next step...")
 
 
 
 
102
  last_message = state["messages"][-1]
103
  if hasattr(last_message, "tool_calls") and last_message.tool_calls:
104
  print(f"LLM decided to call tools: {last_message.tool_calls}")
@@ -107,10 +117,15 @@ class LangGraphAgent:
107
  return "end"
108
 
109
  def _call_llm(self, state: AgentState) -> Dict[str, Any]:
110
- print("Calling LLM...")
111
- # Bind tools to LLM for function calling
112
- llm_with_tools = self.llm.bind_tools(tools)
113
- response = llm_with_tools.invoke(state["messages"])
 
 
 
 
 
114
  print(f"LLM response: {response.content[:100]}...")
115
  return {"messages": [response]}
116
 
@@ -165,26 +180,45 @@ class LangGraphAgent:
165
 
166
  if final_graph_state and final_graph_state["messages"]:
167
  for msg in reversed(final_graph_state["messages"]):
168
- if isinstance(msg, AIMessage) and not msg.tool_calls:
169
  answer = msg.content.strip()
 
 
 
170
  # Remove common prefixes that LLMs might add despite instructions
171
  prefixes_to_remove = [
172
  "FINAL ANSWER:", "The answer is", "Here is the answer:",
173
- "The final answer is", "Answer:", "Solution:"
 
 
174
  ]
175
  for prefix in prefixes_to_remove:
176
- if answer.upper().startswith(prefix.upper()):
 
177
  answer = answer[len(prefix):].strip()
178
 
179
- # Remove potential quotation marks if the answer is a single word/phrase
180
- if len(answer.split()) < 5: # Heuristic for short answers
181
- if answer.startswith(('"', "'")) and answer.endswith(('"', "'")):
182
- answer = answer[1:-1]
 
 
 
 
 
183
 
184
  print(f"Agent returning answer: {answer}")
185
  return answer
186
- print("No suitable AI message found for final answer. Returning last message content.")
187
- return str(final_graph_state["messages"][-1].content) if final_graph_state["messages"] else "Error: No messages in final state."
 
 
 
 
 
 
 
 
188
  else:
189
  print("Error: Agent did not reach a final state or no messages found.")
190
  return "Error: Agent did not produce a conclusive answer."
@@ -196,7 +230,7 @@ class LangGraphAgent:
196
  return f"Error during agent execution: {e}"
197
 
198
  # --- Main Evaluation Logic (Modified from starter) ---
199
- def run_and_submit_all(profile: gr.OAuthProfile | None):
200
  """
201
  Fetches all questions, runs the LangGraphAgent on them, submits all answers,
202
  and displays the results.
@@ -212,17 +246,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
212
 
213
  if not OPENROUTER_API_KEY:
214
  return "Error: OPENROUTER_API_KEY not found. Please set it in your .env file.", None
215
- if not TAVILY_API_KEY:
216
- print("Warning: TAVILY_API_KEY not found. Tavily search might not work as expected.")
217
- # return "Error: TAVILY_API_KEY not found. Please set it in your .env file.", None
218
 
219
  api_url = DEFAULT_API_URL
220
  questions_url = f"{api_url}/questions"
221
  submit_url = f"{api_url}/submit"
222
 
 
223
  try:
224
- # Default to Llama for now, can be made configurable later (e.g., via Gradio input)
225
- agent = LangGraphAgent(llm_choice="llama")
226
  except Exception as e:
227
  print(f"Error instantiating agent: {e}")
228
  return f"Error initializing agent: {e}", None
@@ -315,7 +347,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
315
  results_df = pd.DataFrame(results_log)
316
  return status_message, results_df
317
 
318
- # --- Gradio Interface (Mostly as provided) ---
319
  with gr.Blocks() as demo:
320
  gr.Markdown("# LangGraph GAIA Agent Evaluation Runner")
321
  gr.Markdown(
@@ -325,20 +356,28 @@ with gr.Blocks() as demo:
325
  2. **Create a `.env` file** in the root of your space with your API keys:
326
  ```
327
  OPENROUTER_API_KEY="your_openrouter_api_key"
328
- TAVILY_API_KEY="your_tavily_api_key" # Optional, but recommended for TavilySearch tool
329
  ```
330
  3. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
331
- 4. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
332
  ---
333
  **Disclaimers:**
334
  - Ensure your Hugging Face Space is public for the `agent_code` link to be verifiable.
335
  - Submitting all answers can take some time as the agent processes each question.
336
- - This agent uses LangGraph with TavilySearch, Wikipedia, Arxiv, and Qwen via OpenRouter.
337
  """
338
  )
339
 
340
  gr.LoginButton()
341
 
 
 
 
 
 
 
 
342
  run_button = gr.Button("Run Evaluation & Submit All Answers")
343
 
344
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
@@ -346,6 +385,7 @@ with gr.Blocks() as demo:
346
 
347
  run_button.click(
348
  fn=run_and_submit_all,
 
349
  outputs=[status_output, results_table]
350
  )
351
 
 
54
  if not OPENROUTER_API_KEY:
55
  raise ValueError("OPENROUTER_API_KEY is not set. Cannot initialize LLM.")
56
 
57
+ self.llm_choice = llm_choice
58
+ self.supports_tool_calling = False # Default to false
59
+
60
  if llm_choice == "llama":
61
  self.llm = ChatOpenAI(
62
+ model="meta-llama/llama-3.1-8b-instruct:free", # Corrected to Llama 3.1 as per user's earlier request
63
  api_key=OPENROUTER_API_KEY,
64
  base_url="https://openrouter.ai/api/v1",
65
+ temperature=0.1,
 
66
  )
67
+ # Llama 3.1 8B on OpenRouter might not support tool calling via the OpenAI SDK binding method
68
+ self.supports_tool_calling = False
69
+ print("Initialized Llama 3.1 8B Instruct (tool calling assumed NOT supported).")
70
  elif llm_choice == "qwen":
71
  self.llm = ChatOpenAI(
72
+ model="qwen/qwen-2-7b-instruct:free", # Using a Qwen-2 model as qwq-32b might be older
73
  api_key=OPENROUTER_API_KEY,
74
  base_url="https://openrouter.ai/api/v1",
75
+ temperature=0.1
76
  )
77
+ # Qwen models on OpenRouter might not support tool calling via the OpenAI SDK binding method
78
+ self.supports_tool_calling = False
79
+ print("Initialized Qwen-2 7B Instruct (tool calling assumed NOT supported).")
80
  else:
81
+ raise ValueError(f"Unsupported LLM choice: {llm_choice}. Choose 'llama', or 'qwen'.")
82
 
83
  self.tools_map = {tool.name: tool for tool in tools}
84
  self.graph = self._build_graph()
 
105
 
106
  def _should_call_tools(self, state: AgentState) -> str:
107
  print("LLM deciding next step...")
108
+ if not self.supports_tool_calling:
109
+ print("Tool calling not supported by the current LLM. Ending interaction.")
110
+ return "end"
111
+
112
  last_message = state["messages"][-1]
113
  if hasattr(last_message, "tool_calls") and last_message.tool_calls:
114
  print(f"LLM decided to call tools: {last_message.tool_calls}")
 
117
  return "end"
118
 
119
  def _call_llm(self, state: AgentState) -> Dict[str, Any]:
120
+ print(f"Calling LLM ({self.llm_choice})...")
121
+ if self.supports_tool_calling:
122
+ print("Binding tools to LLM for function calling.")
123
+ llm_with_tools = self.llm.bind_tools(tools)
124
+ response = llm_with_tools.invoke(state["messages"])
125
+ else:
126
+ print("Invoking LLM without binding tools.")
127
+ response = self.llm.invoke(state["messages"])
128
+
129
  print(f"LLM response: {response.content[:100]}...")
130
  return {"messages": [response]}
131
 
 
180
 
181
  if final_graph_state and final_graph_state["messages"]:
182
  for msg in reversed(final_graph_state["messages"]):
183
+ if isinstance(msg, AIMessage) and not msg.tool_calls and msg.content: # Ensure content exists
184
  answer = msg.content.strip()
185
+ if not answer: # Skip empty answers after initial stripping
186
+ continue
187
+
188
  # Remove common prefixes that LLMs might add despite instructions
189
  prefixes_to_remove = [
190
  "FINAL ANSWER:", "The answer is", "Here is the answer:",
191
+ "The final answer is", "Answer:", "Solution:",
192
+ "The direct answer is", "Here's the concise answer:",
193
+ "Here you go:", "Certainly, the answer is"
194
  ]
195
  for prefix in prefixes_to_remove:
196
+ # Case-insensitive prefix removal
197
+ if answer.lower().startswith(prefix.lower()):
198
  answer = answer[len(prefix):].strip()
199
 
200
+ # More robust quote stripping
201
+ if answer.startswith(("\"", "'")) and answer.endswith(("\"", "'")):
202
+ temp_answer = answer[1:-1]
203
+ # Avoid stripping if it's a legitimately quoted string like "'quoted string'" as the answer itself
204
+ if not (temp_answer.startswith(("\"", "'")) and temp_answer.endswith(("\"", "'"))):
205
+ answer = temp_answer
206
+
207
+ if not answer: # Check again if answer became empty after stripping
208
+ continue
209
 
210
  print(f"Agent returning answer: {answer}")
211
  return answer
212
+
213
+ # Refined fallback logic
214
+ print("No suitable AI message with valid content found after processing. Attempting to return last raw AI message if available.")
215
+ last_ai_msg_content = next((m.content.strip() for m in reversed(final_graph_state["messages"]) if isinstance(m, AIMessage) and m.content and not m.tool_calls), None)
216
+ if last_ai_msg_content:
217
+ print(f"Agent returning last raw AI message as fallback: {last_ai_msg_content}")
218
+ return last_ai_msg_content
219
+
220
+ print("No suitable AI message found for final answer, even as fallback.")
221
+ return "Error: Agent could not extract a valid answer." # More specific error
222
  else:
223
  print("Error: Agent did not reach a final state or no messages found.")
224
  return "Error: Agent did not produce a conclusive answer."
 
230
  return f"Error during agent execution: {e}"
231
 
232
  # --- Main Evaluation Logic (Modified from starter) ---
233
+ def run_and_submit_all(profile: gr.OAuthProfile | None, llm_model_choice: str):
234
  """
235
  Fetches all questions, runs the LangGraphAgent on them, submits all answers,
236
  and displays the results.
 
246
 
247
  if not OPENROUTER_API_KEY:
248
  return "Error: OPENROUTER_API_KEY not found. Please set it in your .env file.", None
249
+ # TAVILY_API_KEY check is handled by the tool initialization itself with a warning.
 
 
250
 
251
  api_url = DEFAULT_API_URL
252
  questions_url = f"{api_url}/questions"
253
  submit_url = f"{api_url}/submit"
254
 
255
+ print(f"Attempting to initialize agent with LLM: {llm_model_choice}")
256
  try:
257
+ agent = LangGraphAgent(llm_choice=llm_model_choice)
 
258
  except Exception as e:
259
  print(f"Error instantiating agent: {e}")
260
  return f"Error initializing agent: {e}", None
 
347
  results_df = pd.DataFrame(results_log)
348
  return status_message, results_df
349
 
 
350
  with gr.Blocks() as demo:
351
  gr.Markdown("# LangGraph GAIA Agent Evaluation Runner")
352
  gr.Markdown(
 
356
  2. **Create a `.env` file** in the root of your space with your API keys:
357
  ```
358
  OPENROUTER_API_KEY="your_openrouter_api_key"
359
+ TAVILY_API_KEY="your_tavily_api_key" # Optional, but TavilySearch tool won't work without it
360
  ```
361
  3. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
362
+ 4. **Select the LLM model** you want the agent to use.
363
+ 5. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
364
  ---
365
  **Disclaimers:**
366
  - Ensure your Hugging Face Space is public for the `agent_code` link to be verifiable.
367
  - Submitting all answers can take some time as the agent processes each question.
368
+ - The agent will use the selected LLM. Note that only some models (e.g., llama) support tool/function calling. If a model without tool support is chosen for a task requiring tools, it may not perform optimally or might not use tools.
369
  """
370
  )
371
 
372
  gr.LoginButton()
373
 
374
+ llm_choice_dropdown = gr.Dropdown(
375
+ choices=["llama", "qwen"],
376
+ value="llama", # Default to llama as it supports tool calling
377
+ label="Select LLM Model",
378
+ info="Choose the Large Language Model for the agent."
379
+ )
380
+
381
  run_button = gr.Button("Run Evaluation & Submit All Answers")
382
 
383
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
385
 
386
  run_button.click(
387
  fn=run_and_submit_all,
388
+ inputs=[llm_choice_dropdown], # Add llm_choice_dropdown as an input
389
  outputs=[status_output, results_table]
390
  )
391