GilbertoEwaldFilho commited on
Commit
0f0f5ed
·
verified ·
1 Parent(s): 49ab2c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -71
app.py CHANGED
@@ -1,10 +1,12 @@
1
  import os
2
  import re
 
3
  import requests
4
  import pandas as pd
5
  import gradio as gr
6
 
7
  from huggingface_hub import InferenceClient
 
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -15,12 +17,11 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
  # =========================================================
16
  def clean_answer(text: str) -> str:
17
  """
18
- Limpa a resposta retornada pelo modelo:
19
  - remove quebras de linha
20
  - remove 'final answer', 'answer:', etc
21
  - remove aspas externas
22
  - normaliza espaços
23
- NÃO apaga o conteúdo útil.
24
  """
25
  if not text:
26
  return ""
@@ -38,96 +39,177 @@ def clean_answer(text: str) -> str:
38
 
39
  text = text.replace("\n", " ").strip()
40
 
41
- # aspas externas
42
- if len(text) >= 2 and text[0] == text[-1] and text[0] in ['"', "'"]:
 
43
  text = text[1:-1].strip()
44
 
45
  text = re.sub(r"\s+", " ", text)
46
-
47
  return text.strip()
48
 
49
 
50
  # =========================================================
51
- # Prompt base para o agente
52
  # =========================================================
53
- SYSTEM_PROMPT = (
54
- "You are an AI agent solving GAIA-style questions.\n"
55
- "You have access to a web search tool (DuckDuckGoSearchTool).\n"
56
- "For each question, you MUST search the web when needed to obtain accurate, "
57
- "up-to-date factual information before answering.\n"
58
- "Use the search tool, read the results, reason, and then produce ONLY the final answer.\n"
59
- "Do NOT output explanations, steps, reasoning, citations, links, or any extra words.\n"
60
- "Do NOT output labels like 'Final answer', 'Answer:', etc.\n"
61
- "If the answer is a number, output just the number. "
62
- "If it is a word or short phrase, output just that.\n"
63
- "Your output will be compared to the ground truth using EXACT MATCH."
64
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  # =========================================================
68
- # Basic Agent Definition – usando smolagents
69
  # =========================================================
70
-
71
  class BasicAgent:
72
  """
73
- Agente simples usando InferenceClient.chat_completion
74
- para responder as questões do GAIA em modo conversacional.
 
 
 
75
  """
76
 
77
  def __init__(self):
78
- print("Initializing Simple GAIA Agent with chat_completion...")
79
 
80
  hf_token = os.getenv("HF_TOKEN")
81
  if not hf_token:
82
  raise ValueError(
83
- "HF_TOKEN not found! Crie um Secret chamado HF_TOKEN em Settings → Variables."
84
  )
85
 
86
- # Modelo que sabemos ser suportado como 'conversational'
87
  self.client = InferenceClient(
88
- model="Qwen/Qwen2.5-72B-Instruct", # o mesmo que a infra mostrou no log
89
  token=hf_token,
90
  )
91
 
92
  self.system_instructions = (
93
  "You are solving GAIA benchmark questions.\n"
94
- "Rules:\n"
 
 
95
  "- Answer ONLY with the final answer.\n"
96
- "- No explanations, no reasoning, no extra words.\n"
97
  "- Do NOT write 'Final answer', 'Answer:', etc.\n"
98
  "- If the answer is a number, output just the number.\n"
99
- "- Your output will be compared with EXACT MATCH.\n"
100
  )
101
 
102
- def __call__(self, question: str) -> str:
103
  print(f"\n=== NEW QUESTION ===\n{question}\n")
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  messages = [
106
  {"role": "system", "content": self.system_instructions},
107
- {
108
- "role": "user",
109
- "content": (
110
- question
111
- + "\n\nRemember: reply ONLY with the final answer, nothing else."
112
- ),
113
- },
114
  ]
115
 
116
  try:
117
  completion = self.client.chat_completion(
118
  messages=messages,
119
- max_tokens=64,
120
  temperature=0.1,
121
  top_p=0.9,
122
  )
123
 
124
- # compatível com os dois formatos (.message["content"] ou .message.content)
125
  choice = completion.choices[0]
126
- message = choice.message
127
- if isinstance(message, dict):
128
- raw = message.get("content", "")
129
  else:
130
- raw = getattr(message, "content", "")
131
 
132
  print("RAW MODEL OUTPUT:", repr(raw))
133
  final = clean_answer(raw)
@@ -138,16 +220,16 @@ class BasicAgent:
138
  print("ERROR calling InferenceClient.chat_completion:", e)
139
  return ""
140
 
 
141
  # =========================================================
142
- # Runner + submit (mantido do template, usando BasicAgent novo)
143
  # =========================================================
144
  def run_and_submit_all(profile: gr.OAuthProfile | None):
145
  """
146
- Fetches all questions, runs the BasicAgent on them, submits all answers,
147
- and displays the results.
148
  """
149
- # --- Determine HF Space Runtime URL and Repo URL ---
150
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
151
 
152
  if profile:
153
  username = f"{profile.username}"
@@ -160,21 +242,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
160
  questions_url = f"{api_url}/questions"
161
  submit_url = f"{api_url}/submit"
162
 
163
- # 1. Instantiate Agent
164
  try:
165
  agent = BasicAgent()
166
  except Exception as e:
167
  print(f"Error instantiating agent: {e}")
168
  return f"Error initializing agent: {e}", None
169
 
170
- # Link para o código do agente (Space público)
171
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
172
  print(f"Agent code URL: {agent_code}")
173
 
174
- # 2. Fetch Questions
175
  print(f"Fetching questions from: {questions_url}")
176
  try:
177
- response = requests.get(questions_url, timeout=60) # timeout maior
178
  response.raise_for_status()
179
  questions_data = response.json()
180
  if not questions_data:
@@ -192,18 +273,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
192
  print(f"An unexpected error occurred fetching questions: {e}")
193
  return f"An unexpected error occurred fetching questions: {e}", None
194
 
195
- # 3. Run your Agent
196
  results_log = []
197
  answers_payload = []
198
  print(f"Running agent on {len(questions_data)} questions...")
 
199
  for item in questions_data:
200
  task_id = item.get("task_id")
201
  question_text = item.get("question")
202
  if not task_id or question_text is None:
203
  print(f"Skipping item with missing task_id or question: {item}")
204
  continue
 
205
  try:
206
- submitted_answer = agent(question_text)
 
 
207
  answers_payload.append(
208
  {"task_id": task_id, "submitted_answer": submitted_answer}
209
  )
@@ -228,7 +313,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
228
  print("Agent did not produce any answers to submit.")
229
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
230
 
231
- # 4. Prepare Submission
232
  submission_data = {
233
  "username": username.strip(),
234
  "agent_code": agent_code,
@@ -239,10 +324,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
239
  )
240
  print(status_update)
241
 
242
- # 5. Submit
243
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
244
  try:
245
- response = requests.post(submit_url, json=submission_data, timeout=60)
246
  response.raise_for_status()
247
  result_data = response.json()
248
  final_status = (
@@ -283,22 +368,23 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
283
  return status_message, results_df
284
 
285
 
286
- # --- Build Gradio Interface using Blocks ---
 
 
287
  with gr.Blocks() as demo:
288
- gr.Markdown("# Basic Agent Evaluation Runner (smolagents)")
 
289
  gr.Markdown(
290
  """
291
- **Instructions:**
292
- 1. This space uses a simple agent built with `smolagents` + `InferenceClientModel`.
293
- 2. Log in to your Hugging Face account using the button below.
294
- 3. Click **'Run Evaluation & Submit All Answers'** to fetch questions,
295
- run the agent, submit answers, and see your score.
296
- ---
297
- **Notes:**
298
- - The correction on the server uses EXACT MATCH, so the agent is prompted
299
- to output only the final answer (sem 'FINAL ANSWER', sem explicações).
300
- - This template is intentionally simples; você pode adicionar tools,
301
- melhorar o prompt, etc., se quiser subir seu score.
302
  """
303
  )
304
 
@@ -319,6 +405,7 @@ with gr.Blocks() as demo:
319
  outputs=[status_output, results_table],
320
  )
321
 
 
322
  if __name__ == "__main__":
323
  print("\n" + "-" * 30 + " App Starting " + "-" * 30)
324
  space_host_startup = os.getenv("SPACE_HOST")
@@ -340,5 +427,6 @@ if __name__ == "__main__":
340
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
341
 
342
  print("-" * (60 + len(" App Starting ")) + "\n")
343
- print("Launching Gradio Interface for Basic Agent Evaluation...")
 
344
  demo.launch(debug=True, share=False)
 
1
  import os
2
  import re
3
+ import io
4
  import requests
5
  import pandas as pd
6
  import gradio as gr
7
 
8
  from huggingface_hub import InferenceClient
9
+ from duckduckgo_search import DDGS
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
17
  # =========================================================
18
  def clean_answer(text: str) -> str:
19
  """
20
+ Limpa a resposta do modelo para bater com EXACT MATCH:
21
  - remove quebras de linha
22
  - remove 'final answer', 'answer:', etc
23
  - remove aspas externas
24
  - normaliza espaços
 
25
  """
26
  if not text:
27
  return ""
 
39
 
40
  text = text.replace("\n", " ").strip()
41
 
42
+ if len(text) >= 2 and text.startswith('"') and text.endswith('"'):
43
+ text = text[1:-1].strip()
44
+ if len(text) >= 2 and text.startswith("'") and text.endswith("'"):
45
  text = text[1:-1].strip()
46
 
47
  text = re.sub(r"\s+", " ", text)
 
48
  return text.strip()
49
 
50
 
51
  # =========================================================
52
+ # Tools auxiliares (search + arquivo)
53
  # =========================================================
54
+ def web_search(query: str, max_results: int = 6) -> str:
55
+ """
56
+ Busca no DuckDuckGo e retorna um texto com snippets.
57
+ Se der erro, retorna string vazia.
58
+ """
59
+ try:
60
+ snippets = []
61
+ with DDGS() as ddgs:
62
+ for r in ddgs.text(query, max_results=max_results):
63
+ title = r.get("title") or ""
64
+ body = r.get("body") or ""
65
+ url = r.get("href") or ""
66
+ snippets.append(f"Title: {title}\nSnippet: {body}\nURL: {url}")
67
+ return "\n\n".join(snippets)[:4000] # corta para não estourar contexto
68
+ except Exception as e:
69
+ print(f"[SEARCH ERROR] {e}")
70
+ return ""
71
+
72
+
73
+ def get_file_context(item: dict) -> str | None:
74
+ """
75
+ Tenta baixar e ler um arquivo associado à questão.
76
+ Supõe que o JSON possa ter um campo 'file_url'.
77
+ Se não tiver ou der erro, retorna None.
78
+ """
79
+ url = (
80
+ item.get("file_url")
81
+ or item.get("file")
82
+ or item.get("attachment_url")
83
+ or item.get("attachment")
84
+ )
85
+
86
+ if not url:
87
+ return None
88
+
89
+ print(f"Trying to download attachment for task {item.get('task_id')} from: {url}")
90
+
91
+ try:
92
+ resp = requests.get(url, timeout=20)
93
+ resp.raise_for_status()
94
+
95
+ content_type = resp.headers.get("content-type", "")
96
+ data = resp.content
97
+
98
+ # XLSX
99
+ if url.endswith(".xlsx") or (
100
+ "spreadsheetml.sheet" in content_type
101
+ ):
102
+ try:
103
+ df = pd.read_excel(io.BytesIO(data))
104
+ csv_preview = df.to_csv(index=False)
105
+ return csv_preview[:4000]
106
+ except Exception as e:
107
+ print(f"[FILE XLSX PARSE ERROR] {e}")
108
+ return None
109
+
110
+ # CSV / texto
111
+ try:
112
+ text = resp.text
113
+ return text[:4000]
114
+ except Exception as e:
115
+ print(f"[FILE TEXT PARSE ERROR] {e}")
116
+ return None
117
+
118
+ except Exception as e:
119
+ print(f"[FILE DOWNLOAD ERROR] {e}")
120
+ return None
121
 
122
 
123
  # =========================================================
124
+ # Basic Agent Definition – sem smolagents, usando só InferenceClient
125
  # =========================================================
 
126
  class BasicAgent:
127
  """
128
+ Agente que:
129
+ - usa DuckDuckGo para buscar contexto
130
+ - tenta ler arquivo anexo (se o JSON tiver file_url)
131
+ - chama Qwen via chat_completion
132
+ - devolve apenas a resposta final (EXACT MATCH friendly)
133
  """
134
 
135
  def __init__(self):
136
+ print("Initializing GAIA agent with InferenceClient + DuckDuckGo...")
137
 
138
  hf_token = os.getenv("HF_TOKEN")
139
  if not hf_token:
140
  raise ValueError(
141
+ "HF_TOKEN not found! Configure um Secret chamado HF_TOKEN em Settings → Variables."
142
  )
143
 
144
+ # Modelo conversacional (suporta chat_completion)
145
  self.client = InferenceClient(
146
+ model="Qwen/Qwen2.5-72B-Instruct",
147
  token=hf_token,
148
  )
149
 
150
  self.system_instructions = (
151
  "You are solving GAIA benchmark questions.\n"
152
+ "You may receive web search snippets and/or file contents.\n"
153
+ "Use them to answer accurately.\n"
154
+ "RULES:\n"
155
  "- Answer ONLY with the final answer.\n"
156
+ "- No explanations, no reasoning steps, no justification.\n"
157
  "- Do NOT write 'Final answer', 'Answer:', etc.\n"
158
  "- If the answer is a number, output just the number.\n"
159
+ "- Your output will be compared using EXACT MATCH.\n"
160
  )
161
 
162
+ def __call__(self, question: str, file_context: str | None = None) -> str:
163
  print(f"\n=== NEW QUESTION ===\n{question}\n")
164
 
165
+ # 1) Busca na web
166
+ search_context = web_search(question)
167
+ print(f"[SEARCH LENGTH] {len(search_context)} chars")
168
+
169
+ # 2) Constrói contexto adicional
170
+ extra_parts = []
171
+ if search_context:
172
+ extra_parts.append("Web search results:\n" + search_context)
173
+ if file_context:
174
+ extra_parts.append("Relevant file content:\n" + file_context)
175
+
176
+ extra_context = "\n\n".join(extra_parts)
177
+ if len(extra_context) > 6000:
178
+ extra_context = extra_context[:6000]
179
+
180
+ user_content = question
181
+ if extra_context:
182
+ user_content += (
183
+ "\n\nHere is some external context (web and/or file):\n"
184
+ + extra_context
185
+ + "\n\nUsing ONLY the necessary information above, "
186
+ "answer the question. Remember: reply ONLY with the final answer."
187
+ )
188
+ else:
189
+ user_content += (
190
+ "\n\nAnswer the question using your knowledge. "
191
+ "Remember: reply ONLY with the final answer."
192
+ )
193
+
194
  messages = [
195
  {"role": "system", "content": self.system_instructions},
196
+ {"role": "user", "content": user_content},
 
 
 
 
 
 
197
  ]
198
 
199
  try:
200
  completion = self.client.chat_completion(
201
  messages=messages,
202
+ max_tokens=96,
203
  temperature=0.1,
204
  top_p=0.9,
205
  )
206
 
 
207
  choice = completion.choices[0]
208
+ msg = choice.message
209
+ if isinstance(msg, dict):
210
+ raw = msg.get("content", "")
211
  else:
212
+ raw = getattr(msg, "content", "")
213
 
214
  print("RAW MODEL OUTPUT:", repr(raw))
215
  final = clean_answer(raw)
 
220
  print("ERROR calling InferenceClient.chat_completion:", e)
221
  return ""
222
 
223
+
224
  # =========================================================
225
+ # Runner + submit (quase igual ao template original)
226
  # =========================================================
227
  def run_and_submit_all(profile: gr.OAuthProfile | None):
228
  """
229
+ Busca todas as questões, roda o agente em cada uma,
230
+ submete as respostas e mostra o resultado.
231
  """
232
+ space_id = os.getenv("SPACE_ID")
 
233
 
234
  if profile:
235
  username = f"{profile.username}"
 
242
  questions_url = f"{api_url}/questions"
243
  submit_url = f"{api_url}/submit"
244
 
245
+ # 1. Instancia o agente
246
  try:
247
  agent = BasicAgent()
248
  except Exception as e:
249
  print(f"Error instantiating agent: {e}")
250
  return f"Error initializing agent: {e}", None
251
 
 
252
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
253
  print(f"Agent code URL: {agent_code}")
254
 
255
+ # 2. Busca perguntas
256
  print(f"Fetching questions from: {questions_url}")
257
  try:
258
+ response = requests.get(questions_url, timeout=120)
259
  response.raise_for_status()
260
  questions_data = response.json()
261
  if not questions_data:
 
273
  print(f"An unexpected error occurred fetching questions: {e}")
274
  return f"An unexpected error occurred fetching questions: {e}", None
275
 
276
+ # 3. Roda o agente
277
  results_log = []
278
  answers_payload = []
279
  print(f"Running agent on {len(questions_data)} questions...")
280
+
281
  for item in questions_data:
282
  task_id = item.get("task_id")
283
  question_text = item.get("question")
284
  if not task_id or question_text is None:
285
  print(f"Skipping item with missing task_id or question: {item}")
286
  continue
287
+
288
  try:
289
+ file_context = get_file_context(item)
290
+ submitted_answer = agent(question_text, file_context=file_context)
291
+
292
  answers_payload.append(
293
  {"task_id": task_id, "submitted_answer": submitted_answer}
294
  )
 
313
  print("Agent did not produce any answers to submit.")
314
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
315
 
316
+ # 4. Monta submissão
317
  submission_data = {
318
  "username": username.strip(),
319
  "agent_code": agent_code,
 
324
  )
325
  print(status_update)
326
 
327
+ # 5. Submete
328
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
329
  try:
330
+ response = requests.post(submit_url, json=submission_data, timeout=120)
331
  response.raise_for_status()
332
  result_data = response.json()
333
  final_status = (
 
368
  return status_message, results_df
369
 
370
 
371
+ # =========================================================
372
+ # Interface Gradio (igual ao template, com texto atualizado)
373
+ # =========================================================
374
  with gr.Blocks() as demo:
375
+ gr.Markdown("# GAIA Agent Evaluation Runner (Custom Qwen + DuckDuckGo)")
376
+
377
  gr.Markdown(
378
  """
379
+ **How to use:**
380
+ 1. Log in to your Hugging Face account using the button below.
381
+ 2. Click **'Run Evaluation & Submit All Answers'**.
382
+ 3. The agent will:
383
+ - fetch all questions,
384
+ - optionally download attached files (if any),
385
+ - perform web search,
386
+ - answer each question with ONLY the final answer (EXACT MATCH friendly),
387
+ - submit the answers to the scoring API.
 
 
388
  """
389
  )
390
 
 
405
  outputs=[status_output, results_table],
406
  )
407
 
408
+
409
  if __name__ == "__main__":
410
  print("\n" + "-" * 30 + " App Starting " + "-" * 30)
411
  space_host_startup = os.getenv("SPACE_HOST")
 
427
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
428
 
429
  print("-" * (60 + len(" App Starting ")) + "\n")
430
+
431
+ print("Launching Gradio Interface for GAIA Agent Evaluation...")
432
  demo.launch(debug=True, share=False)