mujtabarizvi commited on
Commit
93dbedf
·
verified ·
1 Parent(s): b83c856

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -70
app.py CHANGED
@@ -6,19 +6,17 @@ import pandas as pd
6
  import re # For parsing LLM output
7
 
8
  # --- HF Inference API for LLM ---
9
- from huggingface_hub import InferenceClient
 
10
  # You can choose a different model, but make sure it's good at instruction following and ReAct-style prompting.
11
- # Zephyr-7B-beta or Mistral-7B-Instruct are good choices available on the free inference API.
12
- # Starling-LM-7B-beta is also excellent if available and performant enough.
13
  LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" # or "mistralai/Mistral-7B-Instruct-v0.2"
14
- # Ensure you have a Hugging Face token set in your space's secrets if using certain models,
15
- # though many popular ones work without it for basic inference.
16
- # Name: HF_TOKEN, Value: your_hf_token_here (with read access is usually enough for inference)
17
  try:
18
  hf_token = os.getenv("HF_TOKEN")
 
19
  llm_client = InferenceClient(model=LLM_MODEL, token=hf_token)
20
  except Exception as e:
21
- print(f"Error initializing HfInference client: {e}")
22
  llm_client = None
23
 
24
  # --- Tools ---
@@ -31,7 +29,7 @@ def search_tool(query: str) -> str:
31
  Args:
32
  query (str): The search query.
33
  Returns:
34
- str: A string containing the search results.
35
  """
36
  print(f"Tool: search_tool, Query: {query}")
37
  try:
@@ -40,10 +38,12 @@ def search_tool(query: str) -> str:
40
  if results:
41
  return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
42
  else:
43
- return "No results found for your query."
 
44
  except Exception as e:
45
  print(f"Error in search_tool: {e}")
46
- return f"Error performing search: {str(e)}"
 
47
 
48
  # 2. Calculator Tool
49
  def calculator_tool(expression: str) -> str:
@@ -57,21 +57,7 @@ def calculator_tool(expression: str) -> str:
57
  """
58
  print(f"Tool: calculator_tool, Expression: {expression}")
59
  try:
60
- # Basic security: allow only numbers, operators, parentheses, and math functions.
61
- # This is not perfectly secure for a public-facing app with arbitrary eval,
62
- # but for this constrained GAIA context, it's a common approach.
63
- # A safer approach would be to use a dedicated math parsing library.
64
- allowed_chars = "0123456789+-*/(). "
65
- if not all(char in allowed_chars or char.isspace() for char in expression):
66
- # A more robust check would involve parsing the expression.
67
- # For now, we'll allow what seems reasonable for GAIA math.
68
- # Let's try to evaluate common math patterns more safely.
69
- # This simple check is insufficient for true security.
70
- pass # Relaxing this for now to allow GAIA questions like "sqrt(16)" etc.
71
-
72
  # A slightly safer eval using a limited global scope
73
- # For GAIA, often questions involve simple arithmetic or known constants like pi.
74
- # This eval is still risky; a dedicated math expression parser is better for production.
75
  result = eval(expression, {"__builtins__": {}}, {"sqrt": lambda x: x**0.5, "pi": 3.1415926535})
76
  return str(result)
77
  except Exception as e:
@@ -89,14 +75,12 @@ class ReActAgent:
89
  self.max_iterations = max_iterations
90
  self.stop_pattern = "Final Answer:"
91
 
92
- # Construct tool descriptions for the prompt
93
  self.tool_descriptions = "\n".join([
94
  f"- {name}: {inspect.getdoc(func)}"
95
  for name, func in tools.items()
96
  ])
97
  self.tool_names = ", ".join(tools.keys())
98
 
99
- # This is the core ReAct prompt template
100
  self.react_prompt_template = inspect.cleandoc(f"""
101
  You are a helpful and observant AI assistant. Your goal is to answer the following question accurately.
102
  You must use a step-by-step thinking process (Thought, Action, Observation).
@@ -119,20 +103,12 @@ class ReActAgent:
119
 
120
  def run_llm(self, prompt: str) -> str:
121
  try:
122
- # print(f"\n--- LLM Prompt ---\n{prompt}\n--- End LLM Prompt ---")
123
- # Parameters for the LLM call
124
- # `max_new_tokens` is important to give the LLM enough space to think and provide an answer.
125
- # `temperature` can be low for more deterministic ReAct steps.
126
- # `stop_sequences` can help control generation if the model supports it well.
127
  response = self.llm.text_generation(
128
  prompt,
129
- max_new_tokens=512, # Increased to allow for longer thought processes
130
- temperature=0.2, # Lower for more factual/less creative ReAct steps
131
- do_sample=True, # Required if temperature is not 1.0
132
- # stop_sequences=["Observation:", "\nThought:", self.stop_pattern] # Helps stop at logical points
133
- # Using stop_sequences can be tricky and model-dependent. Simpler to parse output.
134
  )
135
- # print(f"--- LLM Raw Response ---\n{response}\n--- End LLM Raw Response ---")
136
  return response.strip()
137
  except Exception as e:
138
  print(f"Error during LLM call: {e}")
@@ -148,21 +124,18 @@ class ReActAgent:
148
  print(f"\nIteration {i+1}")
149
  llm_output = self.run_llm(current_prompt)
150
 
151
- if not llm_output: # Handle cases where LLM returns empty or error
152
  print("LLM returned empty or error, stopping.")
153
  return "Agent Error: LLM failed to respond."
154
 
155
- scratchpad += llm_output + "\n" # Add LLM's entire unfiltered output to scratchpad
156
 
157
- # Check for Final Answer
158
  final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
159
  if final_answer_match:
160
  answer = final_answer_match.group(1).strip()
161
  print(f"Found Final Answer: {answer}")
162
  return answer
163
 
164
- # Parse Action
165
- # Regex to capture: Action: tool_name[input]
166
  action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", llm_output, re.DOTALL)
167
  if action_match:
168
  tool_name = action_match.group(1).strip()
@@ -174,29 +147,21 @@ class ReActAgent:
174
  observation = self.tools[tool_name](tool_input)
175
  except Exception as e:
176
  observation = f"Error executing tool {tool_name}: {e}"
177
- print(f"Observation: {observation[:200]}...") # Print truncated observation
178
  scratchpad += f"Observation: {observation}\n"
179
  else:
180
  print(f"Unknown tool: {tool_name}")
181
  scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
182
  else:
183
- # If no action, it might be just a thought, or malformed. Add the thought to scratchpad.
184
- # Or it might be the LLM directly trying to answer without "Final Answer:"
185
- # We assume the LLM is trying to continue the thought process or has given up.
186
  print("No valid action found in LLM output for this iteration.")
187
- # If the LLM isn't producing actions, it might be stuck or directly answering.
188
- # We will let the loop continue, hoping it recovers or hits max_iterations/Final Answer.
189
- # If it's a malformed output that isn't a Final Answer, it will just be added to scratchpad.
190
 
191
  current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
192
 
193
- print("Max iterations reached. Returning current scratchpad or best guess.")
194
- # If max iterations reached without "Final Answer:", try to extract a plausible answer from the last thought
195
- # or just return a message. This is a fallback.
196
- last_thought_match = re.findall(r"Thought:\s*(.*)", scratchpad, re.IGNORECASE)
197
- if last_thought_match:
198
- return f"Max iterations reached. Last thought: {last_thought_match[-1].strip()}"
199
- return "Agent failed to find an answer within the iteration limit."
200
 
201
 
202
  # --- Constants (from template) ---
@@ -204,10 +169,6 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
204
 
205
  # --- Main Execution Logic (from template, modified to use ReActAgent) ---
206
  def run_and_submit_all(profile: gr.OAuthProfile | None):
207
- """
208
- Fetches all questions, runs the ReActAgent on them, submits all answers,
209
- and displays the results.
210
- """
211
  space_id = os.getenv("SPACE_ID")
212
  if profile:
213
  username = f"{profile.username}"
@@ -220,13 +181,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
220
  questions_url = f"{api_url}/questions"
221
  submit_url = f"{api_url}/submit"
222
 
223
- # 1. Instantiate Agent
224
  try:
225
  available_tools = {
226
  "search_tool": search_tool,
227
  "calculator_tool": calculator_tool,
228
  }
229
- if llm_client is None: # Check if llm_client was initialized
230
  return "LLM Client could not be initialized. Check logs and HF_TOKEN.", None
231
  agent = ReActAgent(llm_client=llm_client, tools=available_tools)
232
  except Exception as e:
@@ -236,10 +196,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
236
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code not available (SPACE_ID not set)"
237
  print(f"Agent code link: {agent_code}")
238
 
239
- # 2. Fetch Questions
240
  print(f"Fetching questions from: {questions_url}")
241
  try:
242
- response = requests.get(questions_url, timeout=20) # Increased timeout
243
  response.raise_for_status()
244
  questions_data = response.json()
245
  if not questions_data:
@@ -254,7 +213,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
254
  print(f"Response text: {response.text[:500]}")
255
  return f"Error decoding server response for questions: {e}", None
256
 
257
- # 3. Run your Agent
258
  results_log = []
259
  answers_payload = []
260
  print(f"Running agent on {len(questions_data)} questions...")
@@ -278,15 +236,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
278
  print("Agent did not produce any answers to submit.")
279
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
280
 
281
- # 4. Prepare Submission
282
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
283
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
284
  print(status_update)
285
 
286
- # 5. Submit
287
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
288
  try:
289
- response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout for submission
290
  response.raise_for_status()
291
  result_data = response.json()
292
  final_status = (
@@ -347,7 +303,7 @@ with gr.Blocks() as demo:
347
  gr.LoginButton()
348
  run_button = gr.Button("Run Evaluation & Submit All Answers")
349
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
350
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) # Removed max_rows
351
 
352
  run_button.click(
353
  fn=run_and_submit_all,
@@ -371,7 +327,7 @@ if __name__ == "__main__":
371
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
372
 
373
  if llm_client is None:
374
- print("⚠️ LLM Client (HfInference) was not initialized. The agent will not work.")
375
  print(" Please check if you need to set the HF_TOKEN secret in your Space settings,")
376
  print(f" and ensure the model '{LLM_MODEL}' is accessible via the Inference API.")
377
  else:
 
6
  import re # For parsing LLM output
7
 
8
  # --- HF Inference API for LLM ---
9
+ from huggingface_hub import InferenceClient # Corrected import
10
+
11
  # You can choose a different model, but make sure it's good at instruction following and ReAct-style prompting.
 
 
12
  LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" # or "mistralai/Mistral-7B-Instruct-v0.2"
13
+
 
 
14
  try:
15
  hf_token = os.getenv("HF_TOKEN")
16
+ # Initialize with the corrected InferenceClient
17
  llm_client = InferenceClient(model=LLM_MODEL, token=hf_token)
18
  except Exception as e:
19
+ print(f"Error initializing InferenceClient: {e}")
20
  llm_client = None
21
 
22
  # --- Tools ---
 
29
  Args:
30
  query (str): The search query.
31
  Returns:
32
+ str: A string containing the search results, or an error/status message.
33
  """
34
  print(f"Tool: search_tool, Query: {query}")
35
  try:
 
38
  if results:
39
  return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
40
  else:
41
+ # Provide a more informative message if no results are found
42
+ return "No results found for your query. This might mean the query returned no relevant documents, or there could be a temporary issue (e.g., rate limit)."
43
  except Exception as e:
44
  print(f"Error in search_tool: {e}")
45
+ # Make the error message slightly more informative about potential causes
46
+ return f"Error performing search: {str(e)}. This could be due to a network issue, an invalid query, or a rate limit."
47
 
48
  # 2. Calculator Tool
49
  def calculator_tool(expression: str) -> str:
 
57
  """
58
  print(f"Tool: calculator_tool, Expression: {expression}")
59
  try:
 
 
 
 
 
 
 
 
 
 
 
 
60
  # A slightly safer eval using a limited global scope
 
 
61
  result = eval(expression, {"__builtins__": {}}, {"sqrt": lambda x: x**0.5, "pi": 3.1415926535})
62
  return str(result)
63
  except Exception as e:
 
75
  self.max_iterations = max_iterations
76
  self.stop_pattern = "Final Answer:"
77
 
 
78
  self.tool_descriptions = "\n".join([
79
  f"- {name}: {inspect.getdoc(func)}"
80
  for name, func in tools.items()
81
  ])
82
  self.tool_names = ", ".join(tools.keys())
83
 
 
84
  self.react_prompt_template = inspect.cleandoc(f"""
85
  You are a helpful and observant AI assistant. Your goal is to answer the following question accurately.
86
  You must use a step-by-step thinking process (Thought, Action, Observation).
 
103
 
104
  def run_llm(self, prompt: str) -> str:
105
  try:
 
 
 
 
 
106
  response = self.llm.text_generation(
107
  prompt,
108
+ max_new_tokens=512,
109
+ temperature=0.2,
110
+ do_sample=True,
 
 
111
  )
 
112
  return response.strip()
113
  except Exception as e:
114
  print(f"Error during LLM call: {e}")
 
124
  print(f"\nIteration {i+1}")
125
  llm_output = self.run_llm(current_prompt)
126
 
127
+ if not llm_output:
128
  print("LLM returned empty or error, stopping.")
129
  return "Agent Error: LLM failed to respond."
130
 
131
+ scratchpad += llm_output + "\n"
132
 
 
133
  final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
134
  if final_answer_match:
135
  answer = final_answer_match.group(1).strip()
136
  print(f"Found Final Answer: {answer}")
137
  return answer
138
 
 
 
139
  action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", llm_output, re.DOTALL)
140
  if action_match:
141
  tool_name = action_match.group(1).strip()
 
147
  observation = self.tools[tool_name](tool_input)
148
  except Exception as e:
149
  observation = f"Error executing tool {tool_name}: {e}"
150
+ print(f"Observation: {observation[:200]}...")
151
  scratchpad += f"Observation: {observation}\n"
152
  else:
153
  print(f"Unknown tool: {tool_name}")
154
  scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
155
  else:
 
 
 
156
  print("No valid action found in LLM output for this iteration.")
 
 
 
157
 
158
  current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
159
 
160
+ # Fallback if max_iterations is reached without a "Final Answer:"
161
+ print(f"Max iterations reached for question (first 50 chars): {question[:50]}...")
162
+ standard_failure_message = "Agent could not determine an answer within the allowed steps."
163
+ print(f"Returning standard failure message: {standard_failure_message}")
164
+ return standard_failure_message
 
 
165
 
166
 
167
  # --- Constants (from template) ---
 
169
 
170
  # --- Main Execution Logic (from template, modified to use ReActAgent) ---
171
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
172
  space_id = os.getenv("SPACE_ID")
173
  if profile:
174
  username = f"{profile.username}"
 
181
  questions_url = f"{api_url}/questions"
182
  submit_url = f"{api_url}/submit"
183
 
 
184
  try:
185
  available_tools = {
186
  "search_tool": search_tool,
187
  "calculator_tool": calculator_tool,
188
  }
189
+ if llm_client is None:
190
  return "LLM Client could not be initialized. Check logs and HF_TOKEN.", None
191
  agent = ReActAgent(llm_client=llm_client, tools=available_tools)
192
  except Exception as e:
 
196
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code not available (SPACE_ID not set)"
197
  print(f"Agent code link: {agent_code}")
198
 
 
199
  print(f"Fetching questions from: {questions_url}")
200
  try:
201
+ response = requests.get(questions_url, timeout=20)
202
  response.raise_for_status()
203
  questions_data = response.json()
204
  if not questions_data:
 
213
  print(f"Response text: {response.text[:500]}")
214
  return f"Error decoding server response for questions: {e}", None
215
 
 
216
  results_log = []
217
  answers_payload = []
218
  print(f"Running agent on {len(questions_data)} questions...")
 
236
  print("Agent did not produce any answers to submit.")
237
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
238
 
 
239
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
240
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
241
  print(status_update)
242
 
 
243
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
244
  try:
245
+ response = requests.post(submit_url, json=submission_data, timeout=120)
246
  response.raise_for_status()
247
  result_data = response.json()
248
  final_status = (
 
303
  gr.LoginButton()
304
  run_button = gr.Button("Run Evaluation & Submit All Answers")
305
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
306
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
307
 
308
  run_button.click(
309
  fn=run_and_submit_all,
 
327
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
328
 
329
  if llm_client is None:
330
+ print("⚠️ LLM Client (InferenceClient) was not initialized. The agent will not work.")
331
  print(" Please check if you need to set the HF_TOKEN secret in your Space settings,")
332
  print(f" and ensure the model '{LLM_MODEL}' is accessible via the Inference API.")
333
  else: