ksj47 commited on
Commit
ac7dfbb
·
verified ·
1 Parent(s): c28f5cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -218
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from langchain_community.llms import HuggingFaceHub # Uncommented for HuggingFaceHub
6
  # from dotenv import load_dotenv # Uncomment for local testing with a .env file
7
 
8
  # For local testing, you might want to load environment variables from a .env file
@@ -11,148 +11,9 @@ from langchain_community.llms import HuggingFaceHub # Uncommented for HuggingFac
11
  # load_dotenv()
12
 
13
  # --- Constants ---
14
- # import google.generativeai as genai # For Gemini - Commented out
15
-
16
- # ... (rest of your existing imports and constants)
17
-
18
- # --- Constants ---
19
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # This remains the same
20
-
21
- # --- Basic Agent Definition -- (Gemini Agent Commented Out) ---
22
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
23
- # class BasicAgent:
24
- # def __init__(self, google_api_key: str | None = None): # Changed parameter name for clarity
25
- # print("BasicAgent initializing with Google Gemini...")
26
-
27
- # # Determine the Google API token
28
- # token_to_use = google_api_key
29
- # if not token_to_use:
30
- # token_to_use = os.getenv("GOOGLE_API_KEY") # Standard environment variable for Google API keys
31
-
32
- # if not token_to_use:
33
- # raise ValueError(
34
- # "Google API key not found. Please set GOOGLE_API_KEY "
35
- # "as a secret in your Hugging Face Space. This token is required for Gemini."
36
- # )
37
-
38
- # try:
39
- # # Configure the Gemini client
40
- # genai.configure(api_key=token_to_use)
41
-
42
- # self.model_name = "gemini-1.5-pro-latest" # Or "gemini-pro"
43
-
44
- # self.llm = genai.GenerativeModel(self.model_name)
45
-
46
- # self.generation_config = genai.types.GenerationConfig(
47
- # temperature=0.1,
48
- # )
49
- # self.safety_settings = [
50
- # {
51
- # "category": "HARM_CATEGORY_HARASSMENT",
52
- # "threshold": "BLOCK_MEDIUM_AND_ABOVE"
53
- # },
54
- # {
55
- # "category": "HARM_CATEGORY_HATE_SPEECH",
56
- # "threshold": "BLOCK_MEDIUM_AND_ABOVE"
57
- # },
58
- # {
59
- # "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
60
- # "threshold": "BLOCK_MEDIUM_AND_ABOVE"
61
- # },
62
- # {
63
- # "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
64
- # "threshold": "BLOCK_MEDIUM_AND_ABOVE"
65
- # }
66
- # ]
67
-
68
- # print(f"BasicAgent initialized with Google Gemini model: {self.model_name}")
69
- # except Exception as e:
70
- # print(f"Error initializing Google Gemini client: {e}")
71
- # raise ValueError(f"Failed to initialize Gemini: {e}. Check API key and model name.")
72
-
73
- # def __call__(self, question: str, task_id: str | None = None) -> str:
74
- # print(f"Agent (Gemini) received question (Task ID: {task_id}, first 80 chars): {question[:80]}...")
75
-
76
- # current_prompt = f"""You are a diligent and highly intelligent AI assistant. Your goal is to answer the given `Question` accurately and concisely.
77
- # If the question requires multiple steps or information from tools, think step-by-step.
78
- # **Available Tools (Conceptual - for your reasoning process, actual tool calls are not implemented in this version):**
79
- # 1. **`GAIAFileLookup(filename: str) -> str`**: Retrieves file content.
80
- # 2. **`Calculator(expression: str) -> str`**: Performs calculations.
81
- # 3. **`LLM_Query(sub_question: str) -> str`**: For general knowledge.
82
- # **Output Format Expectation:**
83
- # While you might reason using a "Thought:", "Action:", "Observation:" cycle internally, for this specific task, your final output should be ONLY the direct answer to the question.
84
- # Example: If asked "What is 2+2?", your output should be "4".
85
- # **Key Guidelines for GAIA Submission:**
86
- # 1. **Conciseness:** The final answer must be precise and directly address the question.
87
- # 2. **No "FINAL ANSWER" Prefix in Submission:** Do NOT include "FINAL ANSWER:" or "The answer is:" in your actual response. Just the answer value.
88
- # ---
89
- # Now, please answer the following question:
90
- # Question: {question}
91
- # Answer:"""
92
-
93
- # try:
94
- # print(f"Sending to Gemini (first 200 chars of prompt): {current_prompt[:200]}...")
95
-
96
- # response = self.llm.generate_content(
97
- # current_prompt,
98
- # generation_config=self.generation_config,
99
- # safety_settings=self.safety_settings
100
- # )
101
-
102
- # if response.candidates:
103
- # if response.candidates[0].content.parts:
104
- # response_text = response.candidates[0].content.parts[0].text
105
- # else:
106
- # response_text = ""
107
- # print("Warning: Gemini response has no content parts.")
108
- # if response.prompt_feedback and response.prompt_feedback.block_reason:
109
- # print(f"Prompt blocked by Gemini. Reason: {response.prompt_feedback.block_reason_message or response.prompt_feedback.block_reason}")
110
- # return f"AGENT_ERROR: Prompt blocked by Gemini ({response.prompt_feedback.block_reason})."
111
- # else:
112
- # response_text = ""
113
- # print("Warning: Gemini response has no candidates.")
114
- # if response.prompt_feedback and response.prompt_feedback.block_reason:
115
- # print(f"Prompt blocked by Gemini. Reason: {response.prompt_feedback.block_reason_message or response.prompt_feedback.block_reason}")
116
- # return f"AGENT_ERROR: Prompt blocked by Gemini ({response.prompt_feedback.block_reason})."
117
- # return "AGENT_ERROR: Gemini returned no candidates in response."
118
-
119
-
120
- # answer = response_text.strip()
121
-
122
- # if "Answer:" in answer:
123
- # answer = answer.split("Answer:")[-1].strip()
124
-
125
- # common_prefixes_to_remove = [
126
- # "The answer is", "My answer is", "Based on the information", "The final answer is",
127
- # "Here is the answer", "I found that", "It seems that"
128
- # ]
129
- # for prefix in common_prefixes_to_remove:
130
- # if answer.lower().startswith(prefix.lower()):
131
- # answer = answer[len(prefix):].strip()
132
- # if answer.startswith(":") or answer.startswith("."):
133
- # answer = answer[1:].strip()
134
- # break
135
- # if "Final Answer:" in answer:
136
- # answer = answer.split("Final Answer:")[-1].strip()
137
-
138
- # print(f"Agent (Gemini) LLM raw response (first 80 chars): {response_text[:80]}...")
139
- # print(f"Agent (Gemini) cleaned answer (first 80 chars): {answer[:80]}...")
140
-
141
- # if not answer:
142
- # print("Warning: Agent (Gemini) produced an empty answer after cleaning.")
143
- # return "Unable to generate a valid answer from Gemini."
144
-
145
- # return answer
146
- # except Exception as e:
147
- # if hasattr(e, 'message'):
148
- # error_message = e.message
149
- # else:
150
- # error_message = str(e)
151
- # print(f"Error during Gemini LLM call for question '{question[:50]}...': {error_message}")
152
- # return f"AGENT_ERROR: Gemini LLM call failed. ({type(e).__name__}: {error_message})"
153
 
154
  # --- Basic Agent Definition -- (HuggingFaceHub Agent Activated) ---
155
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
156
  class BasicAgent:
157
  def __init__(self, hf_api_token: str | None = None):
158
  print("BasicAgent initializing with HuggingFaceHub...")
@@ -164,30 +25,33 @@ class BasicAgent:
164
  "as a secret in your Hugging Face Space. This token is required for the LLM."
165
  )
166
 
167
- self.llm_repo_id = "mistralai/Mistral-7B-Instruct-v0.1" # Or your preferred model
168
- # self.llm_repo_id = "HuggingFaceH4/zephyr-7b-beta" # Another option
169
- # self.llm_repo_id = "google/gemma-7b-it" # Another option, ensure you have access/agreed to terms
 
170
 
171
  try:
172
  self.llm = HuggingFaceHub(
173
  repo_id=self.llm_repo_id,
174
- # Increased max_new_tokens as the ReAct prompt is long and might generate a longer thought process
175
- # Temperature 0.0 for more deterministic ReAct output, 0.1 is also fine.
176
- model_kwargs={"temperature": 0.1, "max_new_tokens": 512},
 
 
177
  huggingfacehub_api_token=token_to_use
178
  )
179
  print(f"BasicAgent initialized with LLM: {self.llm_repo_id}")
180
  except Exception as e:
181
  print(f"Error initializing HuggingFaceHub: {e}")
182
- raise ValueError(f"Failed to initialize LLM: {e}. Check token and model repo_id.")
 
 
 
 
183
 
184
- # Modified signature to accept task_id (though not used in this simple version yet)
185
  def __call__(self, question: str, task_id: str | None = None) -> str:
186
  print(f"Agent (HF) received question (Task ID: {task_id}, first 80 chars): {question[:80]}...")
187
 
188
- # Prompt engineering is crucial.
189
- # The `question` variable (method argument) is now correctly inserted here.
190
- # This is a single-shot prompt. A true ReAct agent would have a loop.
191
  current_prompt = f"""You are a diligent and highly intelligent AI assistant. Your goal is to answer the given `Question` accurately and concisely.
192
  If the question requires multiple steps or information from tools, think step-by-step.
193
 
@@ -210,52 +74,54 @@ Example: If asked "What is 2+2?", your output should be "4".
210
  Now, please answer the following question:
211
  Question: {question}
212
 
213
- Answer:""" # Modified to guide the LLM towards a direct answer for this simplified agent
214
 
215
  try:
216
  print(f"Sending to LLM (HF Hub) (first 200 chars of prompt): {current_prompt[:200]}...")
217
- # Langchain's HuggingFaceHub.invoke expects a string and returns a string
218
- response_text = self.llm.invoke(current_prompt)
219
  answer = response_text.strip()
220
 
221
- # Further cleaning if the model still adds prefixes or explanations
222
- # This is important because we are not doing a full ReAct loop to extract "Final Answer:"
223
-
224
- # Try to find "Answer:" if the LLM adds it despite instructions
225
  if "Answer:" in answer:
226
- # Take text after the last occurrence of "Answer:"
227
  answer = answer.split("Answer:")[-1].strip()
228
 
229
- # Remove common conversational prefixes that might slip through
230
  common_prefixes_to_remove = [
231
  "The answer is", "My answer is", "Based on the information", "The final answer is",
232
  "Here is the answer", "I found that", "It seems that"
233
- ] # Case-insensitive removal
234
  for prefix in common_prefixes_to_remove:
235
  if answer.lower().startswith(prefix.lower()):
236
  answer = answer[len(prefix):].strip()
237
- # If the first character is now a colon or period, remove it
238
  if answer.startswith(":") or answer.startswith("."):
239
  answer = answer[1:].strip()
240
- break # Only remove one such prefix
241
-
242
- # If the LLM generated a ReAct-style "Final Answer:", extract from it.
243
- if "Final Answer:" in answer: # Check if "Final Answer:" exists in the string
244
- answer = answer.split("Final Answer:")[-1].strip() # Get content after "Final Answer:"
245
 
246
  print(f"Agent (HF) LLM raw response (first 80 chars): {response_text[:80]}...")
247
  print(f"Agent (HF) cleaned answer (first 80 chars): {answer[:80]}...")
248
 
249
  if not answer:
250
  print("Warning: Agent (HF) produced an empty answer after cleaning.")
251
- return "AGENT_ERROR: LLM produced an empty answer." # More specific error
252
 
253
  return answer
254
  except Exception as e:
255
  print(f"Error during LLM call for question '{question[:50]}...': {e}")
 
 
 
 
 
256
  return f"AGENT_ERROR: LLM call failed. ({type(e).__name__}: {str(e)})"
257
 
258
 
 
 
 
259
  def run_and_submit_all(profile: gr.OAuthProfile | None):
260
  """
261
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -267,8 +133,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
267
  username = f"{profile.username}"
268
  print(f"User logged in: {username}")
269
  else:
 
 
 
270
  print("User not logged in.")
271
- return "Please Login to Hugging Face with the button.", None
 
272
 
273
  api_url = DEFAULT_API_URL
274
  questions_url = f"{api_url}/questions"
@@ -276,10 +146,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
276
 
277
  # 1. Instantiate Agent
278
  try:
279
- # This will now instantiate the HuggingFaceHub BasicAgent
280
- agent = BasicAgent()
 
281
  except Exception as e:
282
  print(f"Error instantiating agent: {e}")
 
283
  return f"Error initializing agent: {str(e)}", None
284
 
285
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run_no_space_id"
@@ -300,12 +172,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
300
  return f"Error fetching questions: {e}", None
301
  except requests.exceptions.JSONDecodeError as e:
302
  print(f"Error decoding JSON response from questions endpoint: {e}")
303
- print(f"Response text: {response.text[:500]}")
304
  return f"Error decoding server response for questions: {e}", None
305
- except Exception as e:
306
  print(f"An unexpected error occurred fetching questions: {e}")
307
  return f"An unexpected error occurred fetching questions: {e}", None
308
 
 
309
  # 3. Run your Agent
310
  results_log = []
311
  answers_payload = []
@@ -313,26 +186,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
313
  for i, item in enumerate(questions_data):
314
  task_id = item.get("task_id")
315
  question_text = item.get("question")
316
- if not task_id or question_text is None:
 
317
  print(f"Skipping item with missing task_id or question: {item}")
318
  continue
319
 
320
  print(f"\nProcessing question {i+1}/{len(questions_data)}, Task ID: {task_id}")
321
  try:
322
- # Pass task_id to the agent call
323
  submitted_answer = agent(question_text, task_id=task_id)
324
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
325
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
326
- except Exception as e:
327
  print(f"Error running agent on task {task_id}: {e}")
328
  error_answer = f"AGENT_RUNTIME_ERROR: {type(e).__name__}"
329
  answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
330
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
331
 
332
 
333
- if not answers_payload:
334
  print("Agent did not produce any answers to submit.")
335
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
336
 
337
  # 4. Prepare Submission
338
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
@@ -342,7 +215,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
342
  # 5. Submit
343
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
344
  try:
345
- response = requests.post(submit_url, json=submission_data, timeout=60)
 
346
  response.raise_for_status()
347
  result_data = response.json()
348
  final_status = (
@@ -358,10 +232,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
358
  except requests.exceptions.HTTPError as e:
359
  error_detail = f"Server responded with status {e.response.status_code}."
360
  try:
361
- error_json = e.response.json()
362
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
363
- except requests.exceptions.JSONDecodeError:
364
- error_detail += f" Response: {e.response.text[:500]}"
365
  status_message = f"Submission Failed: {error_detail}"
366
  print(status_message)
367
  results_df = pd.DataFrame(results_log)
@@ -371,18 +245,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
371
  print(status_message)
372
  results_df = pd.DataFrame(results_log)
373
  return status_message, results_df
374
- except requests.exceptions.RequestException as e:
375
  status_message = f"Submission Failed: Network error - {e}"
376
  print(status_message)
377
  results_df = pd.DataFrame(results_log)
378
  return status_message, results_df
379
- except Exception as e:
380
  status_message = f"An unexpected error occurred during submission: {e}"
381
  print(status_message)
382
  results_df = pd.DataFrame(results_log)
383
  return status_message, results_df
384
 
385
-
386
  # --- Build Gradio Interface using Blocks ---
387
  with gr.Blocks() as demo:
388
  gr.Markdown("# Basic Agent Evaluation Runner")
@@ -390,47 +263,28 @@ with gr.Blocks() as demo:
390
  """
391
  **Instructions:**
392
  1. This Space uses a `BasicAgent` with an LLM from HuggingFace Hub. Ensure you have set your `HUGGINGFACEHUB_API_TOKEN` or `HF_TOKEN` in the Space secrets for the LLM to work.
393
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
394
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
395
  ---
396
  **Disclaimers:**
397
  Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions using an LLM).
398
  This space provides a basic setup. For better GAIA scores, you might need to:
399
  - Choose a more powerful LLM (e.g., from the `llm_repo_id` options in `BasicAgent` or others).
400
  - Implement a proper ReAct loop with tool parsing and execution.
401
- - Implement actual tool usage (e.g., `/files/{task_id}`, calculator).
402
  """
403
  )
404
-
405
- hf_profile_state = gr.State(None)
406
 
407
- # This handler is not strictly necessary for the profile data itself if just using gr.LoginButton()
408
- # but can be useful if you need to react to login events beyond what the button click does.
409
- # For this app, `profile` argument to `run_and_submit_all` is handled directly by Gradio if login is used.
410
- # def login_handler(profile: gr.OAuthProfile | None):
411
- # if profile:
412
- # print(f"Profile captured: {profile.username}")
413
- # return profile
414
-
415
- # The LoginButton itself enables OAuth.
416
- # When `run_and_submit_all` is called, if the user is logged in,
417
- # Gradio automatically passes the gr.OAuthProfile object as the first argument
418
- # if the function signature expects it (like `profile: gr.OAuthProfile | None`).
419
  login_button = gr.LoginButton()
420
 
421
  run_button = gr.Button("Run Evaluation & Submit All Answers")
422
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
423
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
424
 
425
- # The `login_button` itself doesn't need to be an input to `run_and_submit_all`
426
- # if `run_and_submit_all` is typed with `gr.OAuthProfile | None` as its first argument.
427
- # Gradio handles passing the profile automatically on click if the user is logged in.
428
- # If the user is not logged in, `profile` will be `None`.
429
  run_button.click(
430
  fn=run_and_submit_all,
431
- # No explicit inputs needed here if the first arg of fn is type-hinted with gr.OAuthProfile
432
- # and you are using gr.LoginButton(). Gradio handles this.
433
- # inputs=[hf_profile_state], # Not needed if using gr.OAuthProfile type hint
434
  outputs=[status_output, results_table]
435
  )
436
 
@@ -452,19 +306,25 @@ if __name__ == "__main__":
452
  else:
453
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
454
 
455
- # Updated token check for HuggingFace Hub
456
  if not (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")):
457
  print("⚠️ WARNING: HUGGINGFACEHUB_API_TOKEN or HF_TOKEN environment variable not found.")
458
  print(" The LLM agent will likely fail to initialize. Please set this token in your Space secrets.")
459
- # else: # Optional: confirm if token is found
460
- # print("✅ HUGGINGFACEHUB_API_TOKEN or HF_TOKEN found (or assumed to be set).")
461
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
- # Commented out the GOOGLE_API_KEY check as it's no longer relevant for this agent
464
- # if not os.getenv("GOOGLE_API_KEY"):
465
- # print("⚠️ WARNING: GOOGLE_API_KEY environment variable not found.")
466
- # print(" The Gemini agent will likely fail to initialize. Please set this token in your Space secrets.")
467
 
468
  print("-"*(60 + len(" App Starting ")) + "\n")
469
  print("Launching Gradio Interface for Basic Agent Evaluation...")
470
- demo.launch(debug=True, share=False)
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from langchain_community.llms import HuggingFaceHub
6
  # from dotenv import load_dotenv # Uncomment for local testing with a .env file
7
 
8
  # For local testing, you might want to load environment variables from a .env file
 
11
  # load_dotenv()
12
 
13
  # --- Constants ---
14
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # --- Basic Agent Definition -- (HuggingFaceHub Agent Activated) ---
 
17
  class BasicAgent:
18
  def __init__(self, hf_api_token: str | None = None):
19
  print("BasicAgent initializing with HuggingFaceHub...")
 
25
  "as a secret in your Hugging Face Space. This token is required for the LLM."
26
  )
27
 
28
+ self.llm_repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
29
+ # Other options:
30
+ # self.llm_repo_id = "HuggingFaceH4/zephyr-7b-beta"
31
+ # self.llm_repo_id = "google/gemma-7b-it" # Ensure you have access/agreed to terms
32
 
33
  try:
34
  self.llm = HuggingFaceHub(
35
  repo_id=self.llm_repo_id,
36
+ task="text-generation", # Explicitly set the task for instruct models
37
+ model_kwargs={
38
+ "temperature": 0.1,
39
+ "max_new_tokens": 1024 # Increased slightly for potentially longer reasoning or verbosity
40
+ },
41
  huggingfacehub_api_token=token_to_use
42
  )
43
  print(f"BasicAgent initialized with LLM: {self.llm_repo_id}")
44
  except Exception as e:
45
  print(f"Error initializing HuggingFaceHub: {e}")
46
+ # Added more detail to the error message
47
+ raise ValueError(
48
+ f"Failed to initialize LLM ({self.llm_repo_id}): {e}. "
49
+ "Check token, model repo_id, and ensure 'huggingface_hub>=0.20.2' is in requirements.txt."
50
+ )
51
 
 
52
  def __call__(self, question: str, task_id: str | None = None) -> str:
53
  print(f"Agent (HF) received question (Task ID: {task_id}, first 80 chars): {question[:80]}...")
54
 
 
 
 
55
  current_prompt = f"""You are a diligent and highly intelligent AI assistant. Your goal is to answer the given `Question` accurately and concisely.
56
  If the question requires multiple steps or information from tools, think step-by-step.
57
 
 
74
  Now, please answer the following question:
75
  Question: {question}
76
 
77
+ Answer:"""
78
 
79
  try:
80
  print(f"Sending to LLM (HF Hub) (first 200 chars of prompt): {current_prompt[:200]}...")
81
+ response_text = self.llm.invoke(current_prompt)
 
82
  answer = response_text.strip()
83
 
84
+ # Clean the answer
85
+ # If the model includes the "Answer:" prompt in its response
 
 
86
  if "Answer:" in answer:
 
87
  answer = answer.split("Answer:")[-1].strip()
88
 
 
89
  common_prefixes_to_remove = [
90
  "The answer is", "My answer is", "Based on the information", "The final answer is",
91
  "Here is the answer", "I found that", "It seems that"
92
+ ]
93
  for prefix in common_prefixes_to_remove:
94
  if answer.lower().startswith(prefix.lower()):
95
  answer = answer[len(prefix):].strip()
 
96
  if answer.startswith(":") or answer.startswith("."):
97
  answer = answer[1:].strip()
98
+ break
99
+
100
+ # Remove "Final Answer:" if present (as per GAIA guidelines for submission)
101
+ if "Final Answer:" in answer:
102
+ answer = answer.split("Final Answer:")[-1].strip()
103
 
104
  print(f"Agent (HF) LLM raw response (first 80 chars): {response_text[:80]}...")
105
  print(f"Agent (HF) cleaned answer (first 80 chars): {answer[:80]}...")
106
 
107
  if not answer:
108
  print("Warning: Agent (HF) produced an empty answer after cleaning.")
109
+ return "AGENT_ERROR: LLM produced an empty answer."
110
 
111
  return answer
112
  except Exception as e:
113
  print(f"Error during LLM call for question '{question[:50]}...': {e}")
114
+ # Check if the error is the specific AttributeError again
115
+ if isinstance(e, AttributeError) and "'InferenceClient' object has no attribute 'post'" in str(e):
116
+ return (f"AGENT_ERROR: LLM call failed. ({type(e).__name__}: {str(e)}). "
117
+ "This often indicates an issue with the 'huggingface_hub' library version. "
118
+ "Please ensure 'huggingface-hub>=0.20.2' is in your requirements.txt.")
119
  return f"AGENT_ERROR: LLM call failed. ({type(e).__name__}: {str(e)})"
120
 
121
 
122
+ # --- The rest of your Gradio app code (run_and_submit_all, UI blocks) remains the same ---
123
+ # Make sure to copy the BasicAgent class above into your app.py
124
+
125
  def run_and_submit_all(profile: gr.OAuthProfile | None):
126
  """
127
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
133
  username = f"{profile.username}"
134
  print(f"User logged in: {username}")
135
  else:
136
+ # If running locally without login for testing, you can set a default username
137
+ # For submission to the leaderboard, login is required.
138
+ # username = "local-test-user"
139
  print("User not logged in.")
140
+ return "Please Login to Hugging Face with the button to submit.", None
141
+
142
 
143
  api_url = DEFAULT_API_URL
144
  questions_url = f"{api_url}/questions"
 
146
 
147
  # 1. Instantiate Agent
148
  try:
149
+ # Pass the HF token if available from secrets, or let the agent find it
150
+ # No explicit token passing here as the agent handles os.getenv
151
+ agent = BasicAgent()
152
  except Exception as e:
153
  print(f"Error instantiating agent: {e}")
154
+ # Return the more detailed error from agent init if it fails
155
  return f"Error initializing agent: {str(e)}", None
156
 
157
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run_no_space_id"
 
172
  return f"Error fetching questions: {e}", None
173
  except requests.exceptions.JSONDecodeError as e:
174
  print(f"Error decoding JSON response from questions endpoint: {e}")
175
+ print(f"Response text: {response.text[:500]}") # Log part of the response
176
  return f"Error decoding server response for questions: {e}", None
177
+ except Exception as e: # Catch any other unexpected errors
178
  print(f"An unexpected error occurred fetching questions: {e}")
179
  return f"An unexpected error occurred fetching questions: {e}", None
180
 
181
+
182
  # 3. Run your Agent
183
  results_log = []
184
  answers_payload = []
 
186
  for i, item in enumerate(questions_data):
187
  task_id = item.get("task_id")
188
  question_text = item.get("question")
189
+
190
+ if not task_id or question_text is None: # More robust check
191
  print(f"Skipping item with missing task_id or question: {item}")
192
  continue
193
 
194
  print(f"\nProcessing question {i+1}/{len(questions_data)}, Task ID: {task_id}")
195
  try:
 
196
  submitted_answer = agent(question_text, task_id=task_id)
197
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
198
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
199
+ except Exception as e: # Catch errors from the agent call itself
200
  print(f"Error running agent on task {task_id}: {e}")
201
  error_answer = f"AGENT_RUNTIME_ERROR: {type(e).__name__}"
202
  answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
203
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
204
 
205
 
206
+ if not answers_payload: # Handle case where no answers were generated
207
  print("Agent did not produce any answers to submit.")
208
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # Return empty df
209
 
210
  # 4. Prepare Submission
211
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
215
  # 5. Submit
216
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
217
  try:
218
+ # Increased timeout for submission as well, server might be busy
219
+ response = requests.post(submit_url, json=submission_data, timeout=120)
220
  response.raise_for_status()
221
  result_data = response.json()
222
  final_status = (
 
232
  except requests.exceptions.HTTPError as e:
233
  error_detail = f"Server responded with status {e.response.status_code}."
234
  try:
235
+ error_json = e.response.json() # Try to get JSON error detail
236
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
237
+ except requests.exceptions.JSONDecodeError: # If response is not JSON
238
+ error_detail += f" Response: {e.response.text[:500]}" # Log first 500 chars
239
  status_message = f"Submission Failed: {error_detail}"
240
  print(status_message)
241
  results_df = pd.DataFrame(results_log)
 
245
  print(status_message)
246
  results_df = pd.DataFrame(results_log)
247
  return status_message, results_df
248
+ except requests.exceptions.RequestException as e: # Catch other requests errors
249
  status_message = f"Submission Failed: Network error - {e}"
250
  print(status_message)
251
  results_df = pd.DataFrame(results_log)
252
  return status_message, results_df
253
+ except Exception as e: # Catch any other unexpected errors during submission
254
  status_message = f"An unexpected error occurred during submission: {e}"
255
  print(status_message)
256
  results_df = pd.DataFrame(results_log)
257
  return status_message, results_df
258
 
 
259
  # --- Build Gradio Interface using Blocks ---
260
  with gr.Blocks() as demo:
261
  gr.Markdown("# Basic Agent Evaluation Runner")
 
263
  """
264
  **Instructions:**
265
  1. This Space uses a `BasicAgent` with an LLM from HuggingFace Hub. Ensure you have set your `HUGGINGFACEHUB_API_TOKEN` or `HF_TOKEN` in the Space secrets for the LLM to work.
266
+ 2. **Crucial:** Ensure your `requirements.txt` file includes `huggingface-hub>=0.20.2` to prevent common LLM call errors.
267
+ 3. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
268
+ 4. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
269
  ---
270
  **Disclaimers:**
271
  Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions using an LLM).
272
  This space provides a basic setup. For better GAIA scores, you might need to:
273
  - Choose a more powerful LLM (e.g., from the `llm_repo_id` options in `BasicAgent` or others).
274
  - Implement a proper ReAct loop with tool parsing and execution.
275
+ - Implement actual tool usage (e.g., fetching files via `/files/{task_id}`, using a calculator, web search, vision models). The current agent is purely LLM-based and cannot use external tools or files.
276
  """
277
  )
 
 
278
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  login_button = gr.LoginButton()
280
 
281
  run_button = gr.Button("Run Evaluation & Submit All Answers")
282
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
283
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
284
 
 
 
 
 
285
  run_button.click(
286
  fn=run_and_submit_all,
287
+ # Gradio automatically passes gr.OAuthProfile if type-hinted and user is logged in
 
 
288
  outputs=[status_output, results_table]
289
  )
290
 
 
306
  else:
307
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
308
 
 
309
  if not (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")):
310
  print("⚠️ WARNING: HUGGINGFACEHUB_API_TOKEN or HF_TOKEN environment variable not found.")
311
  print(" The LLM agent will likely fail to initialize. Please set this token in your Space secrets.")
312
+ else:
313
+ print("✅ HUGGINGFACEHUB_API_TOKEN or HF_TOKEN found (or assumed to be set).")
314
 
315
+ # Check for huggingface_hub version at startup (informative, actual check is in requirements.txt)
316
+ try:
317
+ import huggingface_hub
318
+ print(f"✅ Found huggingface_hub version: {huggingface_hub.__version__}")
319
+ if tuple(map(int, huggingface_hub.__version__.split('.')[:3])) < (0, 20, 2):
320
+ print("⚠️ WARNING: Your huggingface_hub version is older than 0.20.2. "
321
+ "This might lead to errors. Please update it in requirements.txt to 'huggingface-hub>=0.20.2'.")
322
+ except ImportError:
323
+ print("⚠️ WARNING: huggingface_hub library not found. Please add it to requirements.txt.")
324
+ except Exception as e:
325
+ print(f"ℹ️ Could not determine huggingface_hub version: {e}")
326
 
 
 
 
 
327
 
328
  print("-"*(60 + len(" App Starting ")) + "\n")
329
  print("Launching Gradio Interface for Basic Agent Evaluation...")
330
+ demo.launch(debug=True, share=False) # debug=True can be helpful for local dev