ksj47 commited on
Commit
fbede70
·
verified ·
1 Parent(s): 6d731a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -126
app.py CHANGED
@@ -18,33 +18,21 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
  class BasicAgent:
19
  def __init__(self, hf_api_token: str | None = None):
20
  print("BasicAgent initializing...")
21
- # Determine the Hugging Face API token
22
- # Priority: 1. hf_api_token argument (if passed),
23
- # 2. HUGGINGFACEHUB_API_TOKEN env var,
24
- # 3. HF_TOKEN env var (common for HF Spaces)
25
- token_to_use = hf_api_token
26
- if not token_to_use:
27
- token_to_use = os.getenv("HUGGINGFACEHUB_API_TOKEN")
28
- if not token_to_use:
29
- token_to_use = os.getenv("HF_TOKEN")
30
 
31
  if not token_to_use:
32
- # This error will be caught by the agent instantiation try-except block
33
- # in run_and_submit_all, and a message will be shown in the UI.
34
  raise ValueError(
35
  "Hugging Face API token not found. Please set HUGGINGFACEHUB_API_TOKEN or HF_TOKEN "
36
  "as a secret in your Hugging Face Space. This token is required for the LLM."
37
  )
38
 
39
- # You can change the repo_id to any model on the Hugging Face Hub.
40
- # Ensure the chosen model is suitable for instruction following / question answering.
41
- # Examples: "mistralai/Mistral-7B-Instruct-v0.1", "google/flan-t5-large", "HuggingFaceH4/zephyr-7b-beta"
42
- # Using a smaller, faster model for demonstration:
43
- self.llm_repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
44
  try:
45
  self.llm = HuggingFaceHub(
46
  repo_id=self.llm_repo_id,
47
- model_kwargs={"temperature": 0.1, "max_new_tokens": 150}, # Adjust max_new_tokens as needed
 
 
48
  huggingfacehub_api_token=token_to_use
49
  )
50
  print(f"BasicAgent initialized with LLM: {self.llm_repo_id}")
@@ -52,106 +40,77 @@ class BasicAgent:
52
  print(f"Error initializing HuggingFaceHub: {e}")
53
  raise ValueError(f"Failed to initialize LLM: {e}. Check token and model repo_id.")
54
 
55
-
56
- def __call__(self, question: str) -> str:
57
- print(f"Agent received question (first 80 chars): {question[:80]}...")
58
 
59
  # Prompt engineering is crucial.
60
- # Instruct the LLM to provide a concise answer without any extra phrases.
61
- # Per GAIA instructions: "make sure you don’t include the text “FINAL ANSWER”
62
- # in your submission, just make your agent reply with the answer and nothing else"
63
- prompt = f"""You are a diligent and highly intelligent AI assistant. Your goal is to answer the given `Question` accurately and concisely by following the ReAct (Reasoning and Acting) framework.
64
- You must break down the problem into a sequence of thoughts and actions.
65
-
66
- **Available Tools:**
67
-
68
- 1. **`GAIAFileLookup(filename: str) -> str`**:
69
- * Use this tool to retrieve the content of a specific file relevant to the current question.
70
- * The `task_id` associated with the question will be handled by the system; you only need to provide the `filename`.
71
- * The question might explicitly name the file or give strong hints.
72
- * Returns the text content of the file or an error message if the file cannot be found/read.
73
-
74
- 2. **`Calculator(expression: str) -> str`**:
75
- * Use this tool to perform mathematical calculations.
76
- * Input a valid mathematical expression (e.g., "150 * 2 + 57", "(1024 - 256) / 8").
77
- * Returns the numerical result as a string, or an error message for invalid expressions.
78
-
79
- 3. **`LLM_Query(sub_question: str) -> str`**:
80
- * Use this tool for general knowledge lookups, complex reasoning that doesn't fit other tools, or to rephrase/summarize information you've gathered.
81
- * Input a clear question or instruction.
82
- * Returns the response from a powerful language model.
83
-
84
- **Output Format & Process:**
85
-
86
- You must strictly follow this format for each step of your reasoning:
87
-
88
- `Question:` The user's question you need to answer.
89
-
90
- `Thought:` Your reasoning about the question, your plan to answer it, and self-correction if needed. Explain what you need to find out or calculate.
91
- `Action:` The tool you choose to use from the list above (e.g., `GAIAFileLookup`, `Calculator`, `LLM_Query`). If you believe you can answer directly without a tool, you can skip to `Final Answer:` after your `Thought:`.
92
- `Action Input:` The input string for the chosen `Action`. For `GAIAFileLookup`, this is the filename. For `Calculator`, the mathematical expression. For `LLM_Query`, the sub-question.
93
- `Observation:` The result returned by the tool after your `Action` and `Action Input`. (This will be provided to you by the system).
94
-
95
- ... (You can have multiple Thought/Action/Action Input/Observation cycles) ...
96
 
97
- `Thought:` Once you have gathered all necessary information and are confident in your answer, summarize your findings.
98
- `Final Answer:` The concise answer to the original `Question`. **IMPORTANT: Provide ONLY the answer value itself. Do NOT include the prefix "Final Answer:" or any other explanatory text in the string that represents the actual answer to be submitted. The system will extract the text following this label.**
99
 
100
- **Key Guidelines for GAIA:**
 
 
101
 
102
- 1. **Conciseness:** The final answer must be precise and directly address the question. Avoid any extra text or explanation in the final answer value.
103
- 2. **Exact Match:** The scoring system uses exact match, so precision is critical.
104
- 3. **No "FINAL ANSWER" Prefix in Submission:** Remember, the text *after* your `Final Answer:` label is what gets submitted. Do not include the phrase "FINAL ANSWER" or "The answer is" *within that value*.
105
- 4. **File Identification:** Pay close attention to filenames mentioned or implied in the question.
106
- 5. **Multi-Step Reasoning:** Break down complex questions into smaller, manageable steps using the Thought/Action/Observation cycle.
107
 
108
- **Example Scenario (Illustrative):**
109
-
110
- `Question:` According to `report_Q3.txt`, what was the percentage increase in sales from $1500 in Q2 to the Q3 sales figure, rounded to one decimal place? The Q3 sales figure is mentioned as "Total Revenue".
111
-
112
- `Thought:` I need to find the "Total Revenue" in `report_Q3.txt`. Then I need to calculate the percentage increase from $1500 to that revenue. Finally, I need to round the result to one decimal place.
113
- `Action: GAIAFileLookup`
114
- `Action Input: report_Q3.txt`
115
- `Observation: [System provides content of report_Q3.txt, e.g., "...Total Revenue: $1800..."]`
116
-
117
- `Thought:` The report states Total Revenue (Q3 sales) is $1800. Q2 sales were $1500. Now I need to calculate the percentage increase: ((New - Old) / Old) * 100.
118
- `Action: Calculator`
119
- `Action Input: ((1800 - 1500) / 1500) * 100`
120
- `Observation: 20.0`
121
-
122
- `Thought:` The percentage increase is 20.0%. The question asks for it rounded to one decimal place, which it already is.
123
- `Final Answer: 20.0%`
124
 
125
  ---
126
 
127
- Now, please answer the following question using the ReAct framework:
128
- `Question: {actual_question_text_will_be_inserted_here}"""
 
 
129
 
130
  try:
131
- response = self.llm.invoke(prompt)
132
- answer = response.strip()
 
 
 
 
133
 
134
- # Further cleaning if the model still adds prefixes (common with some models)
135
- # Convert to lower for case-insensitive prefix checking
136
- answer_lower = answer.lower()
137
- common_prefixes = ["answer:", "the answer is:", "concise answer:"]
138
- for prefix in common_prefixes:
139
- if answer_lower.startswith(prefix):
 
 
 
 
 
 
140
  answer = answer[len(prefix):].strip()
141
- break # Remove only the first matching prefix
 
 
 
 
 
 
 
142
 
143
- print(f"Agent LLM raw response (first 80 chars): {response[:80]}...")
144
- print(f"Agent final answer (first 80 chars): {answer[:80]}...")
145
 
146
- if not answer: # Handle cases where the answer becomes empty after stripping
147
  print("Warning: Agent produced an empty answer after cleaning.")
148
- # Return a placeholder that indicates an issue but is still a string
149
  return "Unable to generate a valid answer."
150
 
151
  return answer
152
  except Exception as e:
153
  print(f"Error during LLM call for question '{question[:50]}...': {e}")
154
- # Return an error message string, as the submission expects a string answer.
155
  return f"AGENT_ERROR: LLM call failed. ({type(e).__name__})"
156
 
157
  def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -174,11 +133,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
174
 
175
  # 1. Instantiate Agent
176
  try:
177
- # The BasicAgent will attempt to find the HF token from env variables.
178
  agent = BasicAgent()
179
  except Exception as e:
180
  print(f"Error instantiating agent: {e}")
181
- # Return the error message to be displayed in the Gradio UI
182
  return f"Error initializing agent: {str(e)}", None
183
 
184
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run_no_space_id"
@@ -187,7 +144,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
187
  # 2. Fetch Questions
188
  print(f"Fetching questions from: {questions_url}")
189
  try:
190
- response = requests.get(questions_url, timeout=20) # Increased timeout
191
  response.raise_for_status()
192
  questions_data = response.json()
193
  if not questions_data:
@@ -218,12 +175,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
218
 
219
  print(f"\nProcessing question {i+1}/{len(questions_data)}, Task ID: {task_id}")
220
  try:
221
- submitted_answer = agent(question_text)
 
222
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
223
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
224
  except Exception as e:
225
  print(f"Error running agent on task {task_id}: {e}")
226
- # Ensure a placeholder is added for submission to maintain structure
227
  error_answer = f"AGENT_RUNTIME_ERROR: {type(e).__name__}"
228
  answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
229
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -259,8 +216,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
259
  try:
260
  error_json = e.response.json()
261
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
262
- except requests.exceptions.JSONDecodeError: # Renamed from JSONDecodeError for clarity
263
- error_detail += f" Response: {e.response.text[:500]}" # Log part of the response
264
  status_message = f"Submission Failed: {error_detail}"
265
  print(status_message)
266
  results_df = pd.DataFrame(results_log)
@@ -275,7 +232,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
275
  print(status_message)
276
  results_df = pd.DataFrame(results_log)
277
  return status_message, results_df
278
- except Exception as e: # Catch any other unexpected errors during submission
279
  status_message = f"An unexpected error occurred during submission: {e}"
280
  print(status_message)
281
  results_df = pd.DataFrame(results_log)
@@ -296,41 +253,25 @@ with gr.Blocks() as demo:
296
  Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions using an LLM).
297
  This space provides a basic setup. For better GAIA scores, you might need to:
298
  - Choose a more powerful LLM.
299
- - Improve prompt engineering.
300
- - Implement tool usage for questions requiring file access or external actions (the API provides `/files/{task_id}`).
301
  """
302
  )
303
 
304
- # Session state to hold the Hugging Face profile (token and username)
305
- # This isn't strictly necessary for this version as token is read from env for LLM
306
- # but good practice if profile info is needed elsewhere.
307
  hf_profile_state = gr.State(None)
308
 
309
- # Wrap LoginButton with a function to capture the profile
310
  def login_handler(profile: gr.OAuthProfile | None):
311
  if profile:
312
  print(f"Profile captured: {profile.username}")
313
- # If you wanted to pass profile.token to agent:
314
- # BasicAgent(hf_api_token=profile.token) - but env var method is preferred for LLM token
315
  return profile
316
 
317
- # The gr.LoginButton() automatically provides the profile to functions that list it as an input
318
- # So, `run_and_submit_all` will receive it directly when triggered by `run_button`.
319
- # No explicit state management for profile passing to `run_and_submit_all` is needed here.
320
  gr.LoginButton()
321
-
322
-
323
  run_button = gr.Button("Run Evaluation & Submit All Answers")
324
-
325
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
326
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) # Removed max_rows
327
 
328
- # The profile from gr.LoginButton() is implicitly passed as the first argument
329
- # to `run_and_submit_all` if its signature includes it.
330
  run_button.click(
331
  fn=run_and_submit_all,
332
- # No explicit inputs needed here if `gr.LoginButton` handles profile passing.
333
- # If explicit passing was needed from a state: inputs=[hf_profile_state],
334
  outputs=[status_output, results_table]
335
  )
336
 
@@ -352,13 +293,10 @@ if __name__ == "__main__":
352
  else:
353
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
354
 
355
- # Check for HF_TOKEN at startup as a hint for the user
356
  if not (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")):
357
  print("⚠️ WARNING: HUGGINGFACEHUB_API_TOKEN or HF_TOKEN environment variable not found.")
358
  print(" The LLM agent will likely fail to initialize. Please set this token in your Space secrets.")
359
 
360
-
361
  print("-"*(60 + len(" App Starting ")) + "\n")
362
-
363
  print("Launching Gradio Interface for Basic Agent Evaluation...")
364
  demo.launch(debug=True, share=False)
 
18
  class BasicAgent:
19
  def __init__(self, hf_api_token: str | None = None):
20
  print("BasicAgent initializing...")
21
+ token_to_use = hf_api_token or os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
 
22
 
23
  if not token_to_use:
 
 
24
  raise ValueError(
25
  "Hugging Face API token not found. Please set HUGGINGFACEHUB_API_TOKEN or HF_TOKEN "
26
  "as a secret in your Hugging Face Space. This token is required for the LLM."
27
  )
28
 
29
+ self.llm_repo_id = "mistralai/Mistral-7B-Instruct-v0.1" # Or your preferred model
 
 
 
 
30
  try:
31
  self.llm = HuggingFaceHub(
32
  repo_id=self.llm_repo_id,
33
+ # Increased max_new_tokens as the ReAct prompt is long and might generate a longer thought process
34
+ # Temperature 0.0 for more deterministic ReAct output, 0.1 is also fine.
35
+ model_kwargs={"temperature": 0.1, "max_new_tokens": 512},
36
  huggingfacehub_api_token=token_to_use
37
  )
38
  print(f"BasicAgent initialized with LLM: {self.llm_repo_id}")
 
40
  print(f"Error initializing HuggingFaceHub: {e}")
41
  raise ValueError(f"Failed to initialize LLM: {e}. Check token and model repo_id.")
42
 
43
+ # Modified signature to accept task_id (though not used in this simple version yet)
44
+ def __call__(self, question: str, task_id: str | None = None) -> str:
45
+ print(f"Agent received question (Task ID: {task_id}, first 80 chars): {question[:80]}...")
46
 
47
  # Prompt engineering is crucial.
48
+ # The `question` variable (method argument) is now correctly inserted here.
49
+ # This is a single-shot prompt. A true ReAct agent would have a loop.
50
+ current_prompt = f"""You are a diligent and highly intelligent AI assistant. Your goal is to answer the given `Question` accurately and concisely.
51
+ If the question requires multiple steps or information from tools, think step-by-step.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ **Available Tools (Conceptual - for your reasoning process, actual tool calls are not implemented in this version):**
 
54
 
55
+ 1. **`GAIAFileLookup(filename: str) -> str`**: Retrieves file content.
56
+ 2. **`Calculator(expression: str) -> str`**: Performs calculations.
57
+ 3. **`LLM_Query(sub_question: str) -> str`**: For general knowledge.
58
 
59
+ **Output Format Expectation:**
60
+ While you might reason using a "Thought:", "Action:", "Observation:" cycle internally, for this specific task, your final output should be ONLY the direct answer to the question.
61
+ Example: If asked "What is 2+2?", your output should be "4".
 
 
62
 
63
+ **Key Guidelines for GAIA Submission:**
64
+ 1. **Conciseness:** The final answer must be precise and directly address the question.
65
+ 2. **No "FINAL ANSWER" Prefix in Submission:** Do NOT include "FINAL ANSWER:" or "The answer is:" in your actual response. Just the answer value.
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  ---
68
 
69
+ Now, please answer the following question:
70
+ Question: {question}
71
+
72
+ Answer:""" # Modified to guide the LLM towards a direct answer for this simplified agent
73
 
74
  try:
75
+ print(f"Sending to LLM (first 200 chars of prompt): {current_prompt[:200]}...")
76
+ response_text = self.llm.invoke(current_prompt)
77
+ answer = response_text.strip()
78
+
79
+ # Further cleaning if the model still adds prefixes or explanations
80
+ # This is important because we are not doing a full ReAct loop to extract "Final Answer:"
81
 
82
+ # Try to find "Answer:" if the LLM adds it despite instructions
83
+ if "Answer:" in answer:
84
+ # Take text after the last occurrence of "Answer:"
85
+ answer = answer.split("Answer:")[-1].strip()
86
+
87
+ # Remove common conversational prefixes that might slip through
88
+ common_prefixes_to_remove = [
89
+ "The answer is", "My answer is", "Based on the information", "The final answer is",
90
+ "Here is the answer", "I found that", "It seems that"
91
+ ] # Case-insensitive removal
92
+ for prefix in common_prefixes_to_remove:
93
+ if answer.lower().startswith(prefix.lower()):
94
  answer = answer[len(prefix):].strip()
95
+ # If the first character is now a colon or period, remove it
96
+ if answer.startswith(":") or answer.startswith("."):
97
+ answer = answer[1:].strip()
98
+ break # Only remove one such prefix
99
+
100
+ # If the LLM generated a ReAct-style "Final Answer:", extract from it.
101
+ if "Final Answer:" in answer:
102
+ answer = answer.split("Final Answer:")[-1].strip()
103
 
104
+ print(f"Agent LLM raw response (first 80 chars): {response_text[:80]}...")
105
+ print(f"Agent cleaned answer (first 80 chars): {answer[:80]}...")
106
 
107
+ if not answer:
108
  print("Warning: Agent produced an empty answer after cleaning.")
 
109
  return "Unable to generate a valid answer."
110
 
111
  return answer
112
  except Exception as e:
113
  print(f"Error during LLM call for question '{question[:50]}...': {e}")
 
114
  return f"AGENT_ERROR: LLM call failed. ({type(e).__name__})"
115
 
116
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
133
 
134
  # 1. Instantiate Agent
135
  try:
 
136
  agent = BasicAgent()
137
  except Exception as e:
138
  print(f"Error instantiating agent: {e}")
 
139
  return f"Error initializing agent: {str(e)}", None
140
 
141
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run_no_space_id"
 
144
  # 2. Fetch Questions
145
  print(f"Fetching questions from: {questions_url}")
146
  try:
147
+ response = requests.get(questions_url, timeout=20)
148
  response.raise_for_status()
149
  questions_data = response.json()
150
  if not questions_data:
 
175
 
176
  print(f"\nProcessing question {i+1}/{len(questions_data)}, Task ID: {task_id}")
177
  try:
178
+ # Pass task_id to the agent call
179
+ submitted_answer = agent(question_text, task_id=task_id)
180
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
181
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
182
  except Exception as e:
183
  print(f"Error running agent on task {task_id}: {e}")
 
184
  error_answer = f"AGENT_RUNTIME_ERROR: {type(e).__name__}"
185
  answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
186
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
216
  try:
217
  error_json = e.response.json()
218
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
219
+ except requests.exceptions.JSONDecodeError:
220
+ error_detail += f" Response: {e.response.text[:500]}"
221
  status_message = f"Submission Failed: {error_detail}"
222
  print(status_message)
223
  results_df = pd.DataFrame(results_log)
 
232
  print(status_message)
233
  results_df = pd.DataFrame(results_log)
234
  return status_message, results_df
235
+ except Exception as e:
236
  status_message = f"An unexpected error occurred during submission: {e}"
237
  print(status_message)
238
  results_df = pd.DataFrame(results_log)
 
253
  Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions using an LLM).
254
  This space provides a basic setup. For better GAIA scores, you might need to:
255
  - Choose a more powerful LLM.
256
+ - Implement a proper ReAct loop with tool parsing and execution.
257
+ - Implement actual tool usage (e.g., `/files/{task_id}`, calculator).
258
  """
259
  )
260
 
 
 
 
261
  hf_profile_state = gr.State(None)
262
 
 
263
  def login_handler(profile: gr.OAuthProfile | None):
264
  if profile:
265
  print(f"Profile captured: {profile.username}")
 
 
266
  return profile
267
 
 
 
 
268
  gr.LoginButton()
 
 
269
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
270
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
271
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
272
 
 
 
273
  run_button.click(
274
  fn=run_and_submit_all,
 
 
275
  outputs=[status_output, results_table]
276
  )
277
 
 
293
  else:
294
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
295
 
 
296
  if not (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")):
297
  print("⚠️ WARNING: HUGGINGFACEHUB_API_TOKEN or HF_TOKEN environment variable not found.")
298
  print(" The LLM agent will likely fail to initialize. Please set this token in your Space secrets.")
299
 
 
300
  print("-"*(60 + len(" App Starting ")) + "\n")
 
301
  print("Launching Gradio Interface for Basic Agent Evaluation...")
302
  demo.launch(debug=True, share=False)