emanuelediluzio commited on
Commit
69cde85
Β·
verified Β·
1 Parent(s): 956053a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -240
app.py CHANGED
@@ -1,155 +1,145 @@
 
 
 
 
 
 
1
  import os
2
  import re
3
  import io
4
- import json
5
  import time
6
  import traceback
7
  import gradio as gr
8
  import requests
9
  import pandas as pd
10
  from bs4 import BeautifulSoup
11
- from smolagents import (
12
- CodeAgent,
13
- DuckDuckGoSearchTool,
14
- LiteLLMModel,
15
- tool,
16
- )
17
 
18
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
- GROQ_MODEL = "groq/llama-3.3-70b-versatile"
20
 
21
 
22
- @tool
23
- def visit_webpage(url: str) -> str:
24
- """Visits a webpage and extracts its main clean text content.
25
- Use this to read Wikipedia pages, news articles, or any online resource.
26
- Args:
27
- url: The full URL of the webpage to visit.
28
- """
29
  try:
30
  headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
31
- response = requests.get(url, headers=headers, timeout=15)
32
- response.raise_for_status()
33
- soup = BeautifulSoup(response.text, "html.parser")
34
  for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
35
  el.extract()
36
- lines = [l.strip() for l in soup.get_text(separator="\n", strip=True).splitlines() if l.strip()]
37
- return "\n".join(lines)[:10000]
38
  except Exception as e:
39
- return f"Error: {str(e)}"
40
 
41
 
42
- @tool
43
- def get_youtube_transcript(video_url: str) -> str:
44
- """Fetches the transcript/captions of a YouTube video.
45
- Use this whenever the question refers to a YouTube video URL.
46
- Args:
47
- video_url: The full YouTube video URL (or just the video ID).
48
- """
49
  try:
50
  from youtube_transcript_api import YouTubeTranscriptApi
51
- match = re.search(r"(?:v=|youtu\.be/|embed/)([^&\n?#]+)", video_url)
52
- video_id = match.group(1) if match else video_url.strip()
53
  try:
54
- entries = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])
55
  except Exception:
56
- try:
57
- entries = YouTubeTranscriptApi.get_transcript(video_id)
58
- except Exception:
59
- tl = YouTubeTranscriptApi.list_transcripts(video_id)
60
- entries = tl.find_generated_transcript(["en", "it", "fr", "de", "es"]).fetch()
61
- return " ".join([e["text"] for e in entries])[:8000]
62
  except Exception as e:
63
- return f"Transcript error: {str(e)}"
64
 
65
 
66
- @tool
67
- def download_task_file(task_id: str) -> str:
68
- """Downloads and reads the file attached to a GAIA task.
69
- Handles text, CSV, JSON, PDF, Excel (.xlsx/.xls), and Python files.
70
- Always try this tool first if the question might reference an attached file.
71
- Args:
72
- task_id: The task_id string from the GAIA question.
73
- """
74
  try:
75
- file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
76
- response = requests.get(file_url, timeout=20)
77
- if response.status_code == 404:
78
- return "No file attached to this task."
79
- response.raise_for_status()
80
- ct = response.headers.get("Content-Type", "")
81
- cd = response.headers.get("Content-Disposition", "")
82
- filename = ""
83
- if "filename=" in cd:
84
- filename = cd.split("filename=")[-1].strip('" ')
85
  ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
86
 
 
87
  if any(t in ct for t in ["text", "json", "csv"]) or ext in ["txt", "csv", "json", "py", "md"]:
88
- text = response.text
89
  if ext == "csv" or "csv" in ct:
90
  try:
91
- df = pd.read_csv(io.StringIO(text))
92
- return f"CSV with {len(df)} rows, columns: {list(df.columns)}\n\n{df.to_string()}"[:8000]
93
  except Exception:
94
  pass
95
- return text[:8000]
96
 
 
97
  if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
98
  try:
99
- df = pd.read_excel(io.BytesIO(response.content), engine="openpyxl")
100
- return f"Excel with {len(df)} rows, columns: {list(df.columns)}\n\n{df.to_string()}"[:8000]
101
- except Exception as e:
102
- return f"Excel read error: {e}"
103
 
 
104
  if "pdf" in ct or ext == "pdf":
105
  try:
106
  import PyPDF2
107
- reader = PyPDF2.PdfReader(io.BytesIO(response.content))
108
- return "\n".join([p.extract_text() or "" for p in reader.pages])[:8000]
109
- except Exception as e:
110
- return f"PDF read error: {e}"
111
-
112
- if "audio" in ct or ext in ["mp3", "wav", "m4a", "ogg"]:
113
- return f"Audio file ({ct}, {len(response.content)} bytes)."
114
 
115
- if "image" in ct or ext in ["png", "jpg", "jpeg", "gif", "webp"]:
116
- return f"Image file ({ct}, {len(response.content)} bytes)."
 
 
 
117
 
118
  try:
119
- return response.content.decode("utf-8")[:8000]
120
  except Exception:
121
- return f"Binary file ({ct}, {len(response.content)} bytes)."
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  except Exception as e:
123
- return f"File error: {str(e)}"
 
124
 
 
 
 
125
 
126
  def preprocess_question(question: str) -> str:
127
  stripped = question.strip()
128
- reversed_q = stripped[::-1]
129
- keywords = ["answer", "what", "who", "how", "find", "list", "which", "where", "when", "the"]
130
- if sum(1 for w in keywords if w in reversed_q.lower()) > sum(1 for w in keywords if w in stripped.lower()) and len(stripped) > 20:
131
- return reversed_q
132
  return question
133
 
134
 
135
  def clean_answer(raw: str) -> str:
136
  answer = str(raw).strip()
137
- lines = [l.strip() for l in answer.split("\n") if l.strip()]
138
- if lines:
139
- answer = lines[0]
140
- prefixes = [
141
- "the answer is:", "the answer is", "final answer:", "final answer is:",
142
- "final answer is", "answer:", "answer is:", "answer is",
143
- "the result is:", "the result is", "the correct answer is:",
144
- "the correct answer is", "based on", "according to",
145
- "here is the answer:", "sure,", "sure!",
146
- ]
147
- lower = answer.lower()
148
- for prefix in prefixes:
149
- if lower.startswith(prefix):
150
  answer = answer[len(prefix):].strip()
151
- if answer and answer[0] in '"\'':
152
- answer = answer[1:]
153
  break
154
  if answer.endswith(".") and not re.search(r"\d\.$", answer):
155
  answer = answer[:-1].strip()
@@ -157,143 +147,120 @@ def clean_answer(raw: str) -> str:
157
  return answer
158
 
159
 
160
- def prefetch_file(task_id: str) -> str:
161
- if not task_id:
162
- return ""
163
- try:
164
- resp = requests.get(f"https://agents-course-unit4-scoring.hf.space/files/{task_id}", timeout=10)
165
- if resp.status_code != 200:
166
- return ""
167
- ct = resp.headers.get("Content-Type", "")
168
- cd = resp.headers.get("Content-Disposition", "")
169
- filename = cd.split("filename=")[-1].strip('" ') if "filename=" in cd else ""
170
- ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
171
-
172
- if any(t in ct for t in ["text", "json", "csv"]) or ext in ["txt", "csv", "json", "py"]:
173
- if ext == "csv" or "csv" in ct:
174
- try:
175
- df = pd.read_csv(io.StringIO(resp.text))
176
- return f"CSV: {len(df)} rows, cols={list(df.columns)}\n{df.to_string()}"[:5000]
177
- except Exception:
178
- pass
179
- return resp.text[:5000]
180
- if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
181
- try:
182
- df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
183
- return f"Excel: {len(df)} rows, cols={list(df.columns)}\n{df.to_string()}"[:5000]
184
- except Exception:
185
- pass
186
- if "pdf" in ct or ext == "pdf":
187
- try:
188
- import PyPDF2
189
- reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
190
- return "\n".join([p.extract_text() or "" for p in reader.pages])[:5000]
191
- except Exception:
192
- pass
193
- return ""
194
- except Exception:
195
- return ""
196
-
197
-
198
  def is_valid(answer: str) -> bool:
199
  if not answer:
200
  return False
201
- invalid = {"i don't know", "unknown", "n/a", "none", "error", "i cannot", "i can't", "not available"}
202
- return answer.lower().strip() not in invalid
203
 
204
 
205
- def direct_groq(question: str, file_context: str = "") -> str:
206
- groq_key = os.getenv("GROQ_API_KEY", "")
207
- if not groq_key:
208
- return "I don't know"
209
- extra = f"\n\nAttached file:\n{file_context[:2500]}\n" if file_context else ""
210
- prompt = f"""Answer with ONLY the final answer. No explanation. No preamble.
211
- - Numbers: just digits. Names: just the name. Lists: comma-separated.
212
- {extra}
213
- Question: {question}
214
- Answer:"""
215
-
216
- for model in ["llama-3.3-70b-versatile", "llama-3.1-8b-instant"]:
217
  try:
218
  resp = requests.post(
219
  "https://api.groq.com/openai/v1/chat/completions",
220
  headers={"Authorization": f"Bearer {groq_key}", "Content-Type": "application/json"},
221
- json={"model": model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.1, "max_tokens": 150},
222
- timeout=20,
 
 
 
 
 
 
 
 
223
  )
224
  if resp.status_code == 200:
225
- raw = resp.json()["choices"][0]["message"]["content"].strip()
226
- answer = clean_answer(raw)
227
- if is_valid(answer):
228
- return answer
229
  elif resp.status_code == 429:
230
- time.sleep(10)
231
- except Exception:
232
- pass
233
- return "I don't know"
234
-
235
-
236
- class GaiaAgent:
237
- def __init__(self):
238
- print("πŸš€ Init GaiaAgent...")
239
- groq_key = os.getenv("GROQ_API_KEY", "")
240
- if not groq_key:
241
- raise ValueError("❌ GROQ_API_KEY mancante! Vai su console.groq.com")
242
-
243
- self.model = LiteLLMModel(model_id=GROQ_MODEL, api_key=groq_key, temperature=0.1, max_tokens=1024)
244
- self.agent = CodeAgent(
245
- tools=[DuckDuckGoSearchTool(), visit_webpage, get_youtube_transcript, download_task_file],
246
- model=self.model,
247
- max_steps=5, # RIDOTTO da 8 a 5 per velocitΓ 
248
- additional_authorized_imports=[
249
- "requests", "bs4", "json", "time", "math", "datetime",
250
- "pandas", "numpy", "re", "csv", "urllib", "collections",
251
- "itertools", "string", "unicodedata", "statistics",
252
- ],
253
- )
254
- print("βœ… Agent pronto!")
255
-
256
- def __call__(self, question: str, task_id: str = "") -> str:
257
- print(f"\n[Q]: {question[:120]}")
258
- processed = preprocess_question(question)
259
- file_context = prefetch_file(task_id)
260
-
261
- file_hint = f'\nTask has task_id="{task_id}". Call download_task_file("{task_id}") for attached files.' if task_id else ""
262
- extra = f"\n\n--- FILE ---\n{file_context[:3000]}\n---\n" if file_context else ""
263
-
264
- prompt = f"""You are solving GAIA benchmark questions. Find the EXACT answer.
265
-
266
- STRATEGY:
267
- 1. YouTube URL β†’ get_youtube_transcript(url)
268
- 2. Any URL β†’ visit_webpage(url)
269
- 3. Attached file β†’ download_task_file(task_id)
270
- 4. Factual β†’ DuckDuckGoSearchTool + visit_webpage
271
- 5. Math β†’ Python code
272
- 6. Reversed text β†’ text[::-1]
273
-
274
- OUTPUT ONLY the bare answer. No "The answer is". No explanation.
275
- Numbers: just digits. Names: just the name. Lists: comma-separated.
276
- {file_hint}{extra}
277
- Question: {processed}"""
278
-
279
- try:
280
- time.sleep(1) # RIDOTTO da 3 a 1
281
- raw = self.agent.run(prompt)
282
- answer = clean_answer(str(raw))
283
- if is_valid(answer):
284
- print(f" βœ… {answer}")
285
- return answer
286
  except Exception as e:
287
- err = str(e)
288
- print(f" ⚠️ Agent error: {err[:150]}")
289
- if "429" in err:
290
- time.sleep(10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
- # Fallback
293
- answer = direct_groq(processed, file_context)
294
- print(f" πŸ”„ Fallback: {answer}")
295
- return answer
296
 
 
 
 
297
 
298
  def run_and_submit_all(profile: gr.OAuthProfile | None):
299
  space_id = os.getenv("SPACE_ID")
@@ -301,47 +268,54 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
301
  return "Fai il Login con Hugging Face.", None
302
 
303
  username = profile.username
304
- print(f"πŸ‘€ {username}")
 
 
305
 
306
- try:
307
- agent = GaiaAgent()
308
- except Exception as e:
309
- return f"Errore init: {e}", None
310
 
311
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
312
 
313
  try:
314
  resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
315
  resp.raise_for_status()
316
- questions_data = resp.json()
317
  except Exception as e:
318
  return f"Errore domande: {e}", None
319
 
320
- results_log = []
321
- answers_payload = []
 
 
322
 
323
- for i, item in enumerate(questions_data):
324
  task_id = item.get("task_id", "")
325
- question_text = item.get("question")
326
- if not task_id or question_text is None:
327
  continue
328
- print(f"\n[{i+1}/{len(questions_data)}] ──────────")
 
329
  try:
330
- answer = agent(question_text, task_id=task_id)
331
  except Exception as e:
332
- answer = "I don't know"
333
  print(f" ERROR: {e}")
334
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
335
- results_log.append({"Task ID": task_id, "Question": question_text[:100], "Answer": answer})
336
 
337
- if not answers_payload:
338
- return "Nessuna risposta.", pd.DataFrame(results_log)
 
 
 
339
 
 
 
 
 
340
  try:
341
  resp = requests.post(
342
  f"{DEFAULT_API_URL}/submit",
343
- json={"username": username, "agent_code": agent_code, "answers": answers_payload},
344
- timeout=120,
345
  )
346
  resp.raise_for_status()
347
  r = resp.json()
@@ -350,13 +324,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
350
  f"πŸ† {r.get('score', 'N/A')}% ({r.get('correct_count', '?')}/{r.get('total_attempted', '?')})\n"
351
  f"πŸ“ {r.get('message', '')}"
352
  )
353
- return status, pd.DataFrame(results_log)
 
354
  except Exception as e:
355
- return f"❌ Invio fallito: {e}", pd.DataFrame(results_log)
 
356
 
 
 
 
357
 
358
  with gr.Blocks() as demo:
359
- gr.Markdown("# πŸš€ GAIA Agent β€” Final Assignment\nPowered by **Groq** (Llama 3.3 70B)")
360
  gr.LoginButton()
361
  run_button = gr.Button("πŸ”₯ Avvia Valutazione", variant="primary")
362
  status_output = gr.Textbox(label="Risultato", lines=5, interactive=False)
 
1
+ """
2
+ GAIA Agent β€” Final Assignment
3
+ Direct Groq API calls, NO smolagents CodeAgent (too slow).
4
+ Manual tool routing: detect URLs, files, etc. and fetch context before asking Groq.
5
+ Target: 6/20 (30%) to pass.
6
+ """
7
  import os
8
  import re
9
  import io
 
10
  import time
11
  import traceback
12
  import gradio as gr
13
  import requests
14
  import pandas as pd
15
  from bs4 import BeautifulSoup
 
 
 
 
 
 
16
 
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
18
 
19
 
20
+ # ==========================================
21
+ # πŸ”§ TOOLS (plain functions, no smolagents)
22
+ # ==========================================
23
+
24
+ def fetch_webpage(url: str) -> str:
 
 
25
  try:
26
  headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
27
+ resp = requests.get(url, headers=headers, timeout=15)
28
+ resp.raise_for_status()
29
+ soup = BeautifulSoup(resp.text, "html.parser")
30
  for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
31
  el.extract()
32
+ lines = [l.strip() for l in soup.get_text("\n", strip=True).splitlines() if l.strip()]
33
+ return "\n".join(lines)[:8000]
34
  except Exception as e:
35
+ return f"Error: {e}"
36
 
37
 
38
+ def fetch_youtube_transcript(url: str) -> str:
 
 
 
 
 
 
39
  try:
40
  from youtube_transcript_api import YouTubeTranscriptApi
41
+ match = re.search(r"(?:v=|youtu\.be/|embed/)([^&\n?#]+)", url)
42
+ vid = match.group(1) if match else url.strip()
43
  try:
44
+ entries = YouTubeTranscriptApi.get_transcript(vid, languages=["en"])
45
  except Exception:
46
+ entries = YouTubeTranscriptApi.get_transcript(vid)
47
+ return " ".join([e["text"] for e in entries])[:6000]
 
 
 
 
48
  except Exception as e:
49
+ return f"Transcript error: {e}"
50
 
51
 
52
+ def fetch_task_file(task_id: str) -> str:
 
 
 
 
 
 
 
53
  try:
54
+ resp = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=20)
55
+ if resp.status_code != 200:
56
+ return ""
57
+ ct = resp.headers.get("Content-Type", "")
58
+ cd = resp.headers.get("Content-Disposition", "")
59
+ filename = cd.split("filename=")[-1].strip('" ') if "filename=" in cd else ""
 
 
 
 
60
  ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
61
 
62
+ # Text/CSV/JSON
63
  if any(t in ct for t in ["text", "json", "csv"]) or ext in ["txt", "csv", "json", "py", "md"]:
 
64
  if ext == "csv" or "csv" in ct:
65
  try:
66
+ df = pd.read_csv(io.StringIO(resp.text))
67
+ return f"CSV: {len(df)} rows, columns={list(df.columns)}\n{df.to_string()}"[:6000]
68
  except Exception:
69
  pass
70
+ return resp.text[:6000]
71
 
72
+ # Excel
73
  if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
74
  try:
75
+ df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
76
+ return f"Excel: {len(df)} rows, columns={list(df.columns)}\n{df.to_string()}"[:6000]
77
+ except Exception:
78
+ return "Excel file (could not parse)"
79
 
80
+ # PDF
81
  if "pdf" in ct or ext == "pdf":
82
  try:
83
  import PyPDF2
84
+ reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
85
+ return "\n".join([p.extract_text() or "" for p in reader.pages])[:6000]
86
+ except Exception:
87
+ return "PDF file (could not parse)"
 
 
 
88
 
89
+ # Audio/Image
90
+ if "audio" in ct or ext in ["mp3", "wav"]:
91
+ return f"Audio file ({ext}, {len(resp.content)} bytes)"
92
+ if "image" in ct or ext in ["png", "jpg", "jpeg"]:
93
+ return f"Image file ({ext}, {len(resp.content)} bytes)"
94
 
95
  try:
96
+ return resp.content.decode("utf-8")[:6000]
97
  except Exception:
98
+ return f"Binary file ({ct})"
99
+ except Exception:
100
+ return ""
101
+
102
+
103
+ def web_search(query: str) -> str:
104
+ """Quick DuckDuckGo search via ddgs."""
105
+ try:
106
+ from ddgs import DDGS
107
+ with DDGS() as ddgs:
108
+ results = list(ddgs.text(query, max_results=3))
109
+ if not results:
110
+ return "No results found."
111
+ return "\n\n".join([f"{r.get('title','')}: {r.get('body','')}" for r in results])[:4000]
112
  except Exception as e:
113
+ return f"Search error: {e}"
114
+
115
 
116
+ # ==========================================
117
+ # 🧹 UTILITIES
118
+ # ==========================================
119
 
120
  def preprocess_question(question: str) -> str:
121
  stripped = question.strip()
122
+ rev = stripped[::-1]
123
+ kw = ["answer", "what", "who", "how", "find", "list", "which", "where", "when", "the"]
124
+ if sum(1 for w in kw if w in rev.lower()) > sum(1 for w in kw if w in stripped.lower()) and len(stripped) > 20:
125
+ return rev
126
  return question
127
 
128
 
129
  def clean_answer(raw: str) -> str:
130
  answer = str(raw).strip()
131
+ # First non-empty line
132
+ for line in answer.split("\n"):
133
+ line = line.strip()
134
+ if line:
135
+ answer = line
136
+ break
137
+ # Remove prefixes
138
+ for prefix in ["the answer is:", "the answer is", "final answer:", "final answer is",
139
+ "answer:", "answer is", "the result is", "result:", "the correct answer is",
140
+ "based on", "according to", "sure,"]:
141
+ if answer.lower().startswith(prefix):
 
 
142
  answer = answer[len(prefix):].strip()
 
 
143
  break
144
  if answer.endswith(".") and not re.search(r"\d\.$", answer):
145
  answer = answer[:-1].strip()
 
147
  return answer
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def is_valid(answer: str) -> bool:
151
  if not answer:
152
  return False
153
+ return answer.lower().strip() not in {"i don't know", "unknown", "n/a", "none", "error", "i cannot", "i can't"}
 
154
 
155
 
156
+ # ==========================================
157
+ # πŸ€– GROQ DIRECT CALL
158
+ # ==========================================
159
+
160
+ def ask_groq(system: str, user: str, groq_key: str) -> str:
161
+ """Single Groq API call. Fast and simple."""
162
+ for attempt in range(3):
 
 
 
 
 
163
  try:
164
  resp = requests.post(
165
  "https://api.groq.com/openai/v1/chat/completions",
166
  headers={"Authorization": f"Bearer {groq_key}", "Content-Type": "application/json"},
167
+ json={
168
+ "model": "llama-3.3-70b-versatile",
169
+ "messages": [
170
+ {"role": "system", "content": system},
171
+ {"role": "user", "content": user},
172
+ ],
173
+ "temperature": 0.1,
174
+ "max_tokens": 300,
175
+ },
176
+ timeout=25,
177
  )
178
  if resp.status_code == 200:
179
+ return resp.json()["choices"][0]["message"]["content"].strip()
 
 
 
180
  elif resp.status_code == 429:
181
+ wait = 5 * (attempt + 1)
182
+ print(f" Rate limited, waiting {wait}s...")
183
+ time.sleep(wait)
184
+ else:
185
+ print(f" Groq HTTP {resp.status_code}: {resp.text[:100]}")
186
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  except Exception as e:
188
+ print(f" Groq error: {e}")
189
+ time.sleep(3)
190
+ return ""
191
+
192
+
193
+ # ==========================================
194
+ # 🧠 MAIN LOGIC: gather context, then ask
195
+ # ==========================================
196
+
197
+ SYSTEM_PROMPT = """You are an expert AI solving GAIA benchmark questions.
198
+ You will be given a question and possibly some context (web search results, file content, webpage text, video transcript).
199
+ Use the context to find the EXACT answer.
200
+
201
+ RULES:
202
+ - Output ONLY the final answer. Nothing else.
203
+ - No "The answer is", no explanation, no preamble.
204
+ - Numbers: just digits (e.g. 42)
205
+ - Names: just the name (e.g. Einstein)
206
+ - Lists: comma-separated (e.g. cat, dog, bird)
207
+ - No period at the end."""
208
+
209
+
210
+ def solve_question(question: str, task_id: str, groq_key: str) -> str:
211
+ """Gather context, then ask Groq once."""
212
+ print(f"\n[Q]: {question[:120]}")
213
+
214
+ processed = preprocess_question(question)
215
+ context_parts = []
216
+
217
+ # 1. Always try to fetch task file
218
+ file_content = fetch_task_file(task_id)
219
+ if file_content:
220
+ context_parts.append(f"ATTACHED FILE:\n{file_content}")
221
+ print(f" πŸ“ File: {len(file_content)} chars")
222
+
223
+ # 2. If YouTube URL in question
224
+ yt_match = re.search(r'(https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)[^\s]+)', processed)
225
+ if yt_match:
226
+ transcript = fetch_youtube_transcript(yt_match.group(1))
227
+ context_parts.append(f"YOUTUBE TRANSCRIPT:\n{transcript}")
228
+ print(f" 🎬 YouTube transcript: {len(transcript)} chars")
229
+
230
+ # 3. If any other URL in question
231
+ url_match = re.search(r'(https?://[^\s]+)', processed)
232
+ if url_match and not yt_match:
233
+ page = fetch_webpage(url_match.group(1))
234
+ context_parts.append(f"WEBPAGE CONTENT:\n{page}")
235
+ print(f" 🌐 Webpage: {len(page)} chars")
236
+
237
+ # 4. For questions without URLs/files, or to supplement: web search
238
+ if not context_parts or (not yt_match and not url_match):
239
+ # Extract search query from question
240
+ search_q = processed[:200] # Use question as search query
241
+ search_results = web_search(search_q)
242
+ if search_results and "error" not in search_results.lower():
243
+ context_parts.append(f"WEB SEARCH RESULTS:\n{search_results}")
244
+ print(f" πŸ” Search: {len(search_results)} chars")
245
+
246
+ # Build user message
247
+ context = "\n\n".join(context_parts) if context_parts else "No additional context available."
248
+ user_msg = f"CONTEXT:\n{context}\n\nQUESTION: {processed}"
249
+
250
+ # Ask Groq
251
+ raw = ask_groq(SYSTEM_PROMPT, user_msg, groq_key)
252
+ if not raw:
253
+ # Retry with simpler prompt (no context, just question)
254
+ raw = ask_groq(SYSTEM_PROMPT, f"QUESTION: {processed}", groq_key)
255
+
256
+ answer = clean_answer(raw) if raw else "I don't know"
257
+ print(f" β†’ {answer}")
258
+ return answer
259
 
 
 
 
 
260
 
261
+ # ==========================================
262
+ # βš™οΈ RUNNER
263
+ # ==========================================
264
 
265
  def run_and_submit_all(profile: gr.OAuthProfile | None):
266
  space_id = os.getenv("SPACE_ID")
 
268
  return "Fai il Login con Hugging Face.", None
269
 
270
  username = profile.username
271
+ groq_key = os.getenv("GROQ_API_KEY", "")
272
+ if not groq_key:
273
+ return "❌ GROQ_API_KEY mancante! Mettila nei Secrets dello Space.", None
274
 
275
+ print(f"\n{'='*50}\nπŸ‘€ {username}\n{'='*50}")
 
 
 
276
 
277
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
278
 
279
  try:
280
  resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
281
  resp.raise_for_status()
282
+ questions = resp.json()
283
  except Exception as e:
284
  return f"Errore domande: {e}", None
285
 
286
+ print(f"πŸ“‹ {len(questions)} domande\n")
287
+
288
+ results = []
289
+ answers = []
290
 
291
+ for i, item in enumerate(questions):
292
  task_id = item.get("task_id", "")
293
+ q = item.get("question")
294
+ if not task_id or q is None:
295
  continue
296
+
297
+ print(f"[{i+1}/{len(questions)}] ──────────")
298
  try:
299
+ ans = solve_question(q, task_id, groq_key)
300
  except Exception as e:
301
+ ans = "I don't know"
302
  print(f" ERROR: {e}")
 
 
303
 
304
+ answers.append({"task_id": task_id, "submitted_answer": ans})
305
+ results.append({"Task ID": task_id, "Question": q[:100], "Answer": ans})
306
+
307
+ # Small delay between questions to avoid rate limits
308
+ time.sleep(1)
309
 
310
+ if not answers:
311
+ return "Nessuna risposta.", pd.DataFrame(results)
312
+
313
+ print(f"\nπŸ“€ Invio {len(answers)} risposte...")
314
  try:
315
  resp = requests.post(
316
  f"{DEFAULT_API_URL}/submit",
317
+ json={"username": username, "agent_code": agent_code, "answers": answers},
318
+ timeout=60,
319
  )
320
  resp.raise_for_status()
321
  r = resp.json()
 
324
  f"πŸ† {r.get('score', 'N/A')}% ({r.get('correct_count', '?')}/{r.get('total_attempted', '?')})\n"
325
  f"πŸ“ {r.get('message', '')}"
326
  )
327
+ print(f"\n{status}")
328
+ return status, pd.DataFrame(results)
329
  except Exception as e:
330
+ return f"❌ Invio fallito: {e}", pd.DataFrame(results)
331
+
332
 
333
+ # ==========================================
334
+ # πŸ–₯️ GRADIO
335
+ # ==========================================
336
 
337
  with gr.Blocks() as demo:
338
+ gr.Markdown("# πŸš€ GAIA Agent β€” Final Assignment\nPowered by **Groq** (Llama 3.3 70B) β€” direct & fast")
339
  gr.LoginButton()
340
  run_button = gr.Button("πŸ”₯ Avvia Valutazione", variant="primary")
341
  status_output = gr.Textbox(label="Risultato", lines=5, interactive=False)