itsskofficial commited on
Commit
b6137ed
·
1 Parent(s): a155b49

changed model

Browse files
Files changed (2) hide show
  1. app.py +92 -149
  2. requirements.txt +2 -1
app.py CHANGED
@@ -5,19 +5,19 @@ import pandas as pd
5
  import re
6
  import io
7
  import contextlib
 
8
  from huggingface_hub import InferenceClient
9
  from langchain_community.tools import DuckDuckGoSearchRun
10
  from PyPDF2 import PdfReader
11
  from docx import Document
12
- import json
13
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
- # A powerful, open-source model with function-calling capabilities
17
- MODEL_ID = "NousResearch/Hermes-2-Pro-Mistral-7B"
18
- # This prompt template is inspired by the ReAct framework and is tailored for tool use.
19
- PROMPT_TEMPLATE = """<|im_start|>system
20
- You are a helpful assistant designed to answer questions accurately. You have access to the following tools:
21
 
22
  {tools_description}
23
 
@@ -32,10 +32,10 @@ When you have the final answer, respond with:
32
  Thought: I have now found the final answer.
33
  Final Answer: The final answer.
34
 
35
- Do not use a tool if you are not sure about the parameters. Do not make up file names.
36
- Question: {question}<|im_end|>
37
- <|im_start|>assistant
38
- {scratchpad}"""
39
 
40
 
41
  # --- Tool Definitions ---
@@ -46,13 +46,6 @@ class WebSearchTool:
46
  self.search = DuckDuckGoSearchRun()
47
 
48
  def __call__(self, query: str):
49
- """
50
- Searches the web for the given query.
51
- Args:
52
- query (str): The search query.
53
- Returns:
54
- str: The search results.
55
- """
56
  print(f"--- Calling WebSearchTool with query: {query} ---")
57
  try:
58
  return self.search.run(query)
@@ -66,16 +59,9 @@ class WebSearchTool:
66
  class PythonREPLTool:
67
  """A tool to execute Python code."""
68
  def __call__(self, code: str):
69
- """
70
- Executes Python code and returns the output.
71
- Args:
72
- code (str): The Python code to execute.
73
- Returns:
74
- str: The output of the executed code.
75
- """
76
  print(f"--- Calling PythonREPLTool with code: {code} ---")
77
- if "os" in code or "sys" in code or "subprocess" in code:
78
- return "Error: Use of os, sys, or subprocess is not allowed."
79
 
80
  local_vars = {}
81
  string_io = io.StringIO()
@@ -84,7 +70,6 @@ class PythonREPLTool:
84
  exec(code, {}, local_vars)
85
  output = string_io.getvalue()
86
  if not output and local_vars:
87
- # If there was no print statement, return the value of the last variable
88
  output = str(list(local_vars.values())[-1])
89
  return output if output else "Code executed with no output."
90
  except Exception as e:
@@ -100,32 +85,24 @@ class FileReaderTool:
100
  self.api_url = api_url
101
 
102
  def __call__(self, task_id: str, file_name: str):
103
- """
104
- Reads the content of a file.
105
- Args:
106
- task_id (str): The ID of the task the file is associated with.
107
- file_name (str): The name of the file to read. The LLM must infer this from the question.
108
- Returns:
109
- str: The content of the file.
110
- """
111
  print(f"--- Calling FileReaderTool for task_id: {task_id}, file_name: {file_name} ---")
 
 
 
112
  file_url = f"{self.api_url}/files/{task_id}"
113
 
114
  try:
115
  response = requests.get(file_url, timeout=20)
116
  response.raise_for_status()
117
-
118
- content = ""
119
  file_content = io.BytesIO(response.content)
120
 
 
121
  if file_name.endswith('.pdf'):
122
  pdf = PdfReader(file_content)
123
- for page in pdf.pages:
124
- content += page.extract_text() if page.extract_text() else ""
125
  elif file_name.endswith('.docx'):
126
  doc = Document(file_content)
127
- for para in doc.paragraphs:
128
- content += para.text + '\n'
129
  elif file_name.endswith('.csv'):
130
  df = pd.read_csv(file_content)
131
  content = df.to_string()
@@ -136,91 +113,114 @@ class FileReaderTool:
136
  content = file_content.read().decode('utf-8')
137
  else:
138
  return f"Error: Unsupported file type for '{file_name}'. Supported types: .pdf, .docx, .csv, .json, .txt."
139
-
140
  return content if content else "File is empty."
141
-
142
- except requests.exceptions.RequestException as e:
143
- return f"Error downloading file: {e}"
144
  except Exception as e:
145
  return f"Error reading file '{file_name}': {e}"
146
 
147
  @property
148
  def description(self):
149
- return 'file_reader(task_id: str, file_name: str) -> str - Reads the content of a file associated with the current task. Use the file name mentioned in the question.'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
 
152
  # --- GAIA Agent Definition ---
153
  class GaiaAgent:
154
  def __init__(self, hf_token: str, api_url: str, max_turns: int = 8):
155
- print("GaiaAgent initializing...")
156
  if not hf_token:
157
  raise ValueError("Hugging Face token is required for the Inference API.")
158
-
159
  self.llm_client = InferenceClient(model=MODEL_ID, token=hf_token)
160
  self.max_turns = max_turns
161
 
162
- # Initialize tools
163
  self.tools = {
164
  "web_search": WebSearchTool(),
165
  "python_repl": PythonREPLTool(),
166
  "file_reader": FileReaderTool(api_url=api_url),
 
 
167
  }
168
  self.tools_description = "\n".join([f"- `{tool.description}`" for tool in self.tools.values()])
169
  self.tool_names = ", ".join(self.tools.keys())
170
- print("GaiaAgent initialized successfully.")
171
 
172
  def __call__(self, question: str, task_id: str) -> str:
173
  print(f"\n--- Running agent on task {task_id} ---")
174
  print(f"Question: {question[:100]}...")
175
-
176
  scratchpad = ""
177
 
178
  for turn in range(self.max_turns):
179
  print(f"Turn {turn + 1}/{self.max_turns}")
180
-
181
- # 1. Construct the prompt
182
  prompt = PROMPT_TEMPLATE.format(
183
  tools_description=self.tools_description,
184
  tool_names=self.tool_names,
185
  question=question,
186
  scratchpad=scratchpad,
187
  )
188
-
189
- # 2. Call the LLM
190
  try:
191
  llm_output = self.llm_client.text_generation(
192
- prompt, max_new_tokens=1024, stop_sequences=["<|im_end|>", "Observation:"], temperature=0.1
193
  ).strip()
194
  except Exception as e:
195
- print(f"LLM API call failed: {e}")
196
  return f"Error: LLM call failed. {e}"
197
 
198
  print(f"LLM Output:\n{llm_output}")
199
  scratchpad += llm_output
200
-
201
- # 3. Parse the output for Final Answer or Action
202
  final_answer_match = re.search(r"Final Answer:\s*(.*)", scratchpad, re.DOTALL)
203
- action_match = re.search(r"Action:\s*([a-zA-Z0-9_]+)\((.*)\)", llm_output)
204
 
205
  if final_answer_match:
206
- answer = final_answer_match.group(1).strip()
207
- print(f"Final Answer Found: {answer}")
208
- return answer
209
 
210
  elif action_match:
211
  tool_name = action_match.group(1).strip()
212
  tool_args_str = action_match.group(2).strip()
213
 
214
  if tool_name not in self.tools:
215
- observation = f"Error: Unknown tool '{tool_name}'. Available tools: {self.tool_names}"
216
  else:
217
  try:
218
- # Safely parse arguments
219
  args_dict = eval(f"dict({tool_args_str})", {"__builtins__": None}, {})
220
-
221
- if tool_name == 'file_reader':
222
  args_dict['task_id'] = task_id
223
-
224
  tool = self.tools[tool_name]
225
  observation = tool(**args_dict)
226
  except Exception as e:
@@ -229,102 +229,67 @@ class GaiaAgent:
229
  print(f"Observation: {str(observation)[:200]}...")
230
  scratchpad += f"\nObservation: {str(observation)}\n"
231
  else:
232
- print("No valid action or final answer found in LLM output. Continuing thought process.")
233
- scratchpad += "\nObservation: No valid action taken. Please either use a tool with the correct format `Action: tool_name(arg_name=\"value\")` or provide the final answer in the format `Final Answer: your_answer`."
234
 
235
- print("Agent reached max turns.")
236
  return "Agent stopped after reaching maximum turns."
237
 
238
  # --- Main Submission Logic ---
239
-
240
  def run_and_submit_all(profile: gr.OAuthProfile | None):
241
  hf_token = os.getenv("HF_TOKEN")
242
  if not hf_token:
243
- return "Error: `HF_TOKEN` environment variable not set. Please add it to your Space secrets.", None
244
 
245
  space_id = os.getenv("SPACE_ID")
246
  if not space_id:
247
- return "Error: `SPACE_ID` environment variable not found. Are you running in a Hugging Face Space?", None
248
 
249
  if not profile:
250
  return "Please Login to Hugging Face with the button to submit.", None
251
 
252
  username = profile.username
253
- print(f"User logged in: {username}")
254
-
255
  api_url = DEFAULT_API_URL
256
- questions_url = f"{api_url}/questions"
257
- submit_url = f"{api_url}/submit"
258
 
259
- # 1. Instantiate Agent
260
  try:
261
  agent = GaiaAgent(hf_token=hf_token, api_url=api_url)
262
  except Exception as e:
263
- print(f"Error instantiating agent: {e}")
264
  return f"Error initializing agent: {e}", None
265
 
266
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
267
- print(f"Code link: {agent_code}")
268
-
269
- # 2. Fetch Questions
270
  try:
271
- response = requests.get(questions_url, timeout=15)
272
  response.raise_for_status()
273
  questions_data = response.json()
274
- if not questions_data:
275
- return "Fetched questions list is empty or invalid format.", None
276
- print(f"Fetched {len(questions_data)} questions.")
277
  except Exception as e:
278
  return f"Error fetching questions: {e}", None
279
 
280
- # 3. Run Agent and Collect Answers
281
- results_log = []
282
- answers_payload = []
283
- print(f"Running agent on {len(questions_data)} questions...")
284
  for item in questions_data:
285
- task_id = item.get("task_id")
286
- question_text = item.get("question")
287
- if not task_id or question_text is None:
288
- continue
289
  try:
290
- submitted_answer = agent(question_text, task_id)
291
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
292
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
293
  except Exception as e:
294
- print(f"Error running agent on task {task_id}: {e}")
295
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
296
 
297
  if not answers_payload:
298
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
299
 
300
- # 4. Prepare and 5. Submit
301
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
302
- print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
303
 
304
  try:
305
- response = requests.post(submit_url, json=submission_data, timeout=120)
306
  response.raise_for_status()
307
  result_data = response.json()
308
  final_status = (
309
- f"Submission Successful!\n"
310
- f"User: {result_data.get('username')}\n"
311
- f"Overall Score: {result_data.get('score', 'N/A')}% "
312
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
313
- f"Message: {result_data.get('message', 'No message received.')}"
314
  )
315
- results_df = pd.DataFrame(results_log)
316
- return final_status, results_df
317
  except requests.exceptions.RequestException as e:
318
- error_detail = "Network error or server responded with an error."
319
- if e.response is not None:
320
- error_detail = f"Server responded with status {e.response.status_code}. Response: {e.response.text[:500]}"
321
- status_message = f"Submission Failed: {error_detail}"
322
- results_df = pd.DataFrame(results_log)
323
- return status_message, results_df
324
- except Exception as e:
325
- status_message = f"An unexpected error occurred during submission: {e}"
326
- results_df = pd.DataFrame(results_log)
327
- return status_message, results_df
328
 
329
 
330
  # --- Gradio Interface ---
@@ -333,41 +298,19 @@ with gr.Blocks() as demo:
333
  gr.Markdown(
334
  """
335
  **Instructions:**
336
-
337
- 1. **Add your HF Token**: Go to the 'Settings' tab of this Space and add a secret named `HF_TOKEN` with your Hugging Face read token.
338
- 2. **Login**: Log in to your Hugging Face account using the button below. This is required for submission.
339
- 3. **Run**: Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
340
- ---
341
- **Disclaimer:**
342
- This process can take several minutes as the agent processes each question. Please be patient.
343
  """
344
  )
345
-
346
  with gr.Row():
347
  gr.LoginButton()
348
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
349
-
350
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
351
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
352
-
353
- run_button.click(
354
- fn=run_and_submit_all,
355
- outputs=[status_output, results_table]
356
- )
357
 
358
  if __name__ == "__main__":
359
- print("\n" + "-"*30 + " App Starting " + "-"*30)
360
  if not os.getenv("HF_TOKEN"):
361
- print("⚠️ WARNING: `HF_TOKEN` secret not found. The agent will not be able to run.")
362
- else:
363
- print("✅ `HF_TOKEN` secret found.")
364
-
365
- space_id_startup = os.getenv("SPACE_ID")
366
- if space_id_startup:
367
- print(f"✅ SPACE_ID found: {space_id_startup}")
368
- else:
369
- print("ℹ️ SPACE_ID environment variable not found (running locally?).")
370
-
371
- print("-"*(60 + len(" App Starting ")) + "\n")
372
- print("Launching Gradio Interface for GAIA Agent Evaluation...")
373
  demo.launch()
 
5
  import re
6
  import io
7
  import contextlib
8
+ import json
9
  from huggingface_hub import InferenceClient
10
  from langchain_community.tools import DuckDuckGoSearchRun
11
  from PyPDF2 import PdfReader
12
  from docx import Document
13
+ from youtube_transcript_api import YouTubeTranscriptApi
14
 
15
  # --- Constants ---
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
+ # Switched to a more reliable and fast model available on the free Inference API
18
+ MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
19
+ # Updated prompt template to match the Mistral format
20
+ PROMPT_TEMPLATE = """<s>[INST]You are a helpful assistant designed to answer questions accurately. You have access to the following tools:
 
21
 
22
  {tools_description}
23
 
 
32
  Thought: I have now found the final answer.
33
  Final Answer: The final answer.
34
 
35
+ Do not use a tool if you are not sure about the parameters. Do not make up file names. If a tool is not available for a task (e.g., image analysis), state that you cannot answer.
36
+
37
+ Question: {question}
38
+ [/INST]{scratchpad}"""
39
 
40
 
41
  # --- Tool Definitions ---
 
46
  self.search = DuckDuckGoSearchRun()
47
 
48
  def __call__(self, query: str):
 
 
 
 
 
 
 
49
  print(f"--- Calling WebSearchTool with query: {query} ---")
50
  try:
51
  return self.search.run(query)
 
59
  class PythonREPLTool:
60
  """A tool to execute Python code."""
61
  def __call__(self, code: str):
 
 
 
 
 
 
 
62
  print(f"--- Calling PythonREPLTool with code: {code} ---")
63
+ if any(keyword in code for keyword in ["os", "sys", "subprocess", "eval", "exec"]):
64
+ return "Error: Use of os, sys, subprocess, eval, or exec is not allowed for security reasons."
65
 
66
  local_vars = {}
67
  string_io = io.StringIO()
 
70
  exec(code, {}, local_vars)
71
  output = string_io.getvalue()
72
  if not output and local_vars:
 
73
  output = str(list(local_vars.values())[-1])
74
  return output if output else "Code executed with no output."
75
  except Exception as e:
 
85
  self.api_url = api_url
86
 
87
  def __call__(self, task_id: str, file_name: str):
 
 
 
 
 
 
 
 
88
  print(f"--- Calling FileReaderTool for task_id: {task_id}, file_name: {file_name} ---")
89
+ if file_name.endswith(('.mp3', '.wav', '.flac')):
90
+ return "Error: This tool cannot read audio files. Use the `audio_transcriber` tool instead."
91
+
92
  file_url = f"{self.api_url}/files/{task_id}"
93
 
94
  try:
95
  response = requests.get(file_url, timeout=20)
96
  response.raise_for_status()
 
 
97
  file_content = io.BytesIO(response.content)
98
 
99
+ content = ""
100
  if file_name.endswith('.pdf'):
101
  pdf = PdfReader(file_content)
102
+ content = "".join(page.extract_text() for page in pdf.pages if page.extract_text())
 
103
  elif file_name.endswith('.docx'):
104
  doc = Document(file_content)
105
+ content = "\n".join(para.text for para in doc.paragraphs)
 
106
  elif file_name.endswith('.csv'):
107
  df = pd.read_csv(file_content)
108
  content = df.to_string()
 
113
  content = file_content.read().decode('utf-8')
114
  else:
115
  return f"Error: Unsupported file type for '{file_name}'. Supported types: .pdf, .docx, .csv, .json, .txt."
 
116
  return content if content else "File is empty."
 
 
 
117
  except Exception as e:
118
  return f"Error reading file '{file_name}': {e}"
119
 
120
  @property
121
  def description(self):
122
+ return 'file_reader(task_id: str, file_name: str) -> str - Reads content of text-based files (.pdf, .docx, .csv, .json, .txt). For audio, use audio_transcriber.'
123
+
124
+ class AudioTranscriptionTool:
125
+ """A tool to transcribe audio files using the Hugging Face Inference API."""
126
+ def __init__(self, api_url: str, client: InferenceClient):
127
+ self.api_url = api_url
128
+ self.client = client
129
+
130
+ def __call__(self, task_id: str, file_name: str):
131
+ print(f"--- Calling AudioTranscriptionTool for task: {task_id}, file: {file_name} ---")
132
+ file_url = f"{self.api_url}/files/{task_id}"
133
+ try:
134
+ response = requests.get(file_url, timeout=30)
135
+ response.raise_for_status()
136
+ audio_data = response.content
137
+ transcription = self.client.automatic_speech_recognition(audio_data)
138
+ return transcription['text'] if transcription and 'text' in transcription else "Could not transcribe audio."
139
+ except Exception as e:
140
+ return f"Error during audio transcription: {e}"
141
+
142
+ @property
143
+ def description(self):
144
+ return 'audio_transcriber(task_id: str, file_name: str) -> str - Transcribes an audio file (.mp3, .wav) associated with the current task.'
145
+
146
+ class YouTubeTranscriptTool:
147
+ """A tool to fetch the transcript of a YouTube video."""
148
+ def __call__(self, video_url: str):
149
+ print(f"--- Calling YouTubeTranscriptTool for URL: {video_url} ---")
150
+ match = re.search(r"v=([a-zA-Z0-9_-]+)", video_url)
151
+ if not match:
152
+ return "Error: Invalid YouTube URL. Could not extract video ID."
153
+ video_id = match.group(1)
154
+ try:
155
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
156
+ return " ".join([d['text'] for d in transcript_list])
157
+ except Exception as e:
158
+ return f"Error fetching transcript for video {video_id}: {e}. The video might not have a transcript."
159
+
160
+ @property
161
+ def description(self):
162
+ return 'youtube_transcript_fetcher(video_url: str) -> str - Fetches the transcript of a YouTube video. Use for questions about video content.'
163
 
164
 
165
  # --- GAIA Agent Definition ---
166
  class GaiaAgent:
167
  def __init__(self, hf_token: str, api_url: str, max_turns: int = 8):
 
168
  if not hf_token:
169
  raise ValueError("Hugging Face token is required for the Inference API.")
 
170
  self.llm_client = InferenceClient(model=MODEL_ID, token=hf_token)
171
  self.max_turns = max_turns
172
 
 
173
  self.tools = {
174
  "web_search": WebSearchTool(),
175
  "python_repl": PythonREPLTool(),
176
  "file_reader": FileReaderTool(api_url=api_url),
177
+ "youtube_transcript_fetcher": YouTubeTranscriptTool(),
178
+ "audio_transcriber": AudioTranscriptionTool(api_url=api_url, client=self.llm_client),
179
  }
180
  self.tools_description = "\n".join([f"- `{tool.description}`" for tool in self.tools.values()])
181
  self.tool_names = ", ".join(self.tools.keys())
182
+ print("GaiaAgent initialized successfully with tools:", self.tool_names)
183
 
184
  def __call__(self, question: str, task_id: str) -> str:
185
  print(f"\n--- Running agent on task {task_id} ---")
186
  print(f"Question: {question[:100]}...")
 
187
  scratchpad = ""
188
 
189
  for turn in range(self.max_turns):
190
  print(f"Turn {turn + 1}/{self.max_turns}")
 
 
191
  prompt = PROMPT_TEMPLATE.format(
192
  tools_description=self.tools_description,
193
  tool_names=self.tool_names,
194
  question=question,
195
  scratchpad=scratchpad,
196
  )
 
 
197
  try:
198
  llm_output = self.llm_client.text_generation(
199
+ prompt, max_new_tokens=1024, stop_sequences=["Observation:", "[/INST]"], temperature=0.1
200
  ).strip()
201
  except Exception as e:
 
202
  return f"Error: LLM call failed. {e}"
203
 
204
  print(f"LLM Output:\n{llm_output}")
205
  scratchpad += llm_output
206
+
 
207
  final_answer_match = re.search(r"Final Answer:\s*(.*)", scratchpad, re.DOTALL)
208
+ action_match = re.search(r"Action:\s*([a-zA-Z0-9_]+)\((.*)\)", llm_output, re.DOTALL)
209
 
210
  if final_answer_match:
211
+ return final_answer_match.group(1).strip()
 
 
212
 
213
  elif action_match:
214
  tool_name = action_match.group(1).strip()
215
  tool_args_str = action_match.group(2).strip()
216
 
217
  if tool_name not in self.tools:
218
+ observation = f"Error: Unknown tool '{tool_name}'."
219
  else:
220
  try:
 
221
  args_dict = eval(f"dict({tool_args_str})", {"__builtins__": None}, {})
222
+ if tool_name in ['file_reader', 'audio_transcriber']:
 
223
  args_dict['task_id'] = task_id
 
224
  tool = self.tools[tool_name]
225
  observation = tool(**args_dict)
226
  except Exception as e:
 
229
  print(f"Observation: {str(observation)[:200]}...")
230
  scratchpad += f"\nObservation: {str(observation)}\n"
231
  else:
232
+ scratchpad += "\nObservation: No valid action or final answer found. Please format your response as either 'Action: tool_name(args)' or 'Final Answer: your_answer'."
 
233
 
 
234
  return "Agent stopped after reaching maximum turns."
235
 
236
  # --- Main Submission Logic ---
 
237
  def run_and_submit_all(profile: gr.OAuthProfile | None):
238
  hf_token = os.getenv("HF_TOKEN")
239
  if not hf_token:
240
+ return "Error: `HF_TOKEN` secret not set. Please add it to your Space secrets.", None
241
 
242
  space_id = os.getenv("SPACE_ID")
243
  if not space_id:
244
+ return "Error: `SPACE_ID` not found. Are you in a Hugging Face Space?", None
245
 
246
  if not profile:
247
  return "Please Login to Hugging Face with the button to submit.", None
248
 
249
  username = profile.username
 
 
250
  api_url = DEFAULT_API_URL
251
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
252
 
 
253
  try:
254
  agent = GaiaAgent(hf_token=hf_token, api_url=api_url)
255
  except Exception as e:
 
256
  return f"Error initializing agent: {e}", None
257
 
 
 
 
 
258
  try:
259
+ response = requests.get(f"{api_url}/questions", timeout=15)
260
  response.raise_for_status()
261
  questions_data = response.json()
 
 
 
262
  except Exception as e:
263
  return f"Error fetching questions: {e}", None
264
 
265
+ results_log, answers_payload = [], []
 
 
 
266
  for item in questions_data:
267
+ task_id, question_text = item.get("task_id"), item.get("question")
268
+ if not all([task_id, question_text]): continue
 
 
269
  try:
270
+ answer = agent(question_text, task_id)
271
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
272
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
273
  except Exception as e:
 
274
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
275
 
276
  if not answers_payload:
277
+ return "Agent did not produce any answers.", pd.DataFrame(results_log)
278
 
 
279
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
280
 
281
  try:
282
+ response = requests.post(f"{api_url}/submit", json=submission_data, timeout=120)
283
  response.raise_for_status()
284
  result_data = response.json()
285
  final_status = (
286
+ f"Submission Successful! Score: {result_data.get('score', 'N/A')}% "
287
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')})"
 
 
 
288
  )
289
+ return final_status, pd.DataFrame(results_log)
 
290
  except requests.exceptions.RequestException as e:
291
+ error_detail = f"Server responded with status {e.response.status_code}. Response: {e.response.text[:500]}" if e.response else str(e)
292
+ return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
293
 
294
 
295
  # --- Gradio Interface ---
 
298
  gr.Markdown(
299
  """
300
  **Instructions:**
301
+ 1. **Add your HF Token**: Go to your Space's **Settings** and add a secret named `HF_TOKEN` with your Hugging Face `read` token.
302
+ 2. **Login**: Use the button below to login with your Hugging Face account.
303
+ 3. **Run**: Click 'Run Evaluation & Submit' to start the agent. This may take several minutes.
 
 
 
 
304
  """
305
  )
 
306
  with gr.Row():
307
  gr.LoginButton()
308
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
309
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=4, interactive=False)
 
310
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
311
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
312
 
313
  if __name__ == "__main__":
 
314
  if not os.getenv("HF_TOKEN"):
315
+ print("⚠️ WARNING: `HF_TOKEN` secret not found. The agent will not run.")
 
 
 
 
 
 
 
 
 
 
 
316
  demo.launch()
requirements.txt CHANGED
@@ -6,4 +6,5 @@ duckduckgo-search
6
  pypdf2
7
  python-docx
8
  pandas
9
- openpyxl
 
 
6
  pypdf2
7
  python-docx
8
  pandas
9
+ openpyxl
10
+ youtube-transcript-api