wahibtim commited on
Commit
f05a776
Β·
verified Β·
1 Parent(s): 341e0dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -54
app.py CHANGED
@@ -2,7 +2,8 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from smolagents import CodeAgent, HfApiModel, tool
 
6
  from PIL import Image
7
  import io
8
 
@@ -11,7 +12,7 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
  # ====================== TOOLS ======================
12
  @tool
13
  def web_search(query: str) -> str:
14
- """Search the web for information."""
15
  try:
16
  from duckduckgo_search import DDGS
17
  with DDGS() as ddgs:
@@ -20,73 +21,55 @@ def web_search(query: str) -> str:
20
  except:
21
  return "Search failed."
22
 
23
- @tool
24
- def calculate(expression: str) -> str:
25
- """Calculate simple math."""
26
- try:
27
- import math
28
- return str(eval(expression, {"__builtins__": {}}, {"math": math}))
29
- except:
30
- return "Calc failed."
31
-
32
  @tool
33
  def download_file(task_id: str) -> str:
34
- """Download the file (image, csv, audio, etc.) attached to a question."""
35
  url = f"{DEFAULT_API_URL}/files/{task_id}"
36
  try:
37
  r = requests.get(url, timeout=20)
38
  r.raise_for_status()
39
- content_type = r.headers.get("content-type", "")
40
-
41
- if "image" in content_type:
42
  img = Image.open(io.BytesIO(r.content))
43
- return f"Downloaded image: {img.size} {img.format}"
44
  else:
45
- text = r.text[:1500]
46
- return f"Downloaded file content:\n{text}"
47
- except Exception as e:
48
- return f"File download failed: {str(e)}"
49
-
50
 
51
  # ====================== AGENT ======================
52
  class BasicAgent:
53
  def __init__(self):
54
  self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
55
-
56
- self.agent = CodeAgent(
57
- model=self.model,
58
- tools=[web_search, calculate, download_file],
59
- add_base_tools=True,
60
- verbosity_level=0,
61
- max_steps=10,
62
- planning_interval=3
63
- )
64
 
65
  def __call__(self, question: str) -> str:
 
 
 
 
 
 
 
 
66
  try:
67
- output = self.agent.run(question)
68
  answer = str(output).strip()
69
-
70
- # Force clean final answer (this fixes 0%)
71
  if "Final Answer:" in answer:
72
  answer = answer.split("Final Answer:")[-1].strip()
73
- elif "Answer:" in answer:
74
  answer = answer.split("Answer:")[-1].strip()
75
-
76
- # Take last line if too long
77
- if len(answer) > 400:
78
- answer = answer.split("\n")[-1].strip()
79
-
80
- return answer[:700].strip()
81
-
82
  except Exception as e:
83
- return f"Error: {str(e)[:150]}"
84
 
85
 
86
- # ====================== SUBMISSION ======================
87
  def run_and_submit_all(profile: gr.OAuthProfile | None):
88
  if not profile:
89
- return "Please login first.", None
90
 
91
  username = profile.username.strip()
92
  api_url = DEFAULT_API_URL
@@ -94,16 +77,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
94
  submit_url = f"{api_url}/submit"
95
 
96
  agent = BasicAgent()
97
-
98
  space_id = os.getenv("SPACE_ID")
99
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
100
 
101
- response = requests.get(questions_url, timeout=20)
102
- questions_data = response.json()
 
103
 
104
  answers_payload = []
105
  results_log = []
106
 
 
 
107
  for item in questions_data:
108
  task_id = item.get("task_id")
109
  question = item.get("question")
@@ -112,16 +97,23 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
112
 
113
  answer = agent(question)
114
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
115
- results_log.append({"Task ID": task_id, "Question": question[:100]+"...", "Answer": answer[:150]+"..."})
 
 
 
 
116
 
 
 
 
117
  submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
118
 
119
  try:
120
- response = requests.post(submit_url, json=submission_data, timeout=90)
121
- response.raise_for_status()
122
- result = response.json()
123
  score = result.get("score", 0)
124
- status = f"βœ… Done!\nScore: {score}% ({result.get('correct_count',0)}/20)\n{result.get('message','')}"
125
  return status, pd.DataFrame(results_log)
126
  except Exception as e:
127
  return f"Submission failed: {str(e)}", pd.DataFrame(results_log)
@@ -129,8 +121,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
129
 
130
  # ====================== UI ======================
131
  with gr.Blocks() as demo:
132
- gr.Markdown("# Unit 4 - Final GAIA Agent (with file support)")
133
- gr.Markdown("Login β†’ Click button (takes 8-15 min)")
134
 
135
  gr.LoginButton()
136
  btn = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary", size="large")
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import time
6
+ from smolagents import HfApiModel, tool
7
  from PIL import Image
8
  import io
9
 
 
12
  # ====================== TOOLS ======================
13
  @tool
14
  def web_search(query: str) -> str:
15
+ """Search the web."""
16
  try:
17
  from duckduckgo_search import DDGS
18
  with DDGS() as ddgs:
 
21
  except:
22
  return "Search failed."
23
 
 
 
 
 
 
 
 
 
 
24
  @tool
25
  def download_file(task_id: str) -> str:
26
+ """Download attached file (image, csv, audio...) for the task."""
27
  url = f"{DEFAULT_API_URL}/files/{task_id}"
28
  try:
29
  r = requests.get(url, timeout=20)
30
  r.raise_for_status()
31
+ if "image" in r.headers.get("content-type", ""):
 
 
32
  img = Image.open(io.BytesIO(r.content))
33
+ return f"Image downloaded: size {img.size}"
34
  else:
35
+ return f"File downloaded: {len(r.content)} bytes"
36
+ except:
37
+ return "File download failed."
 
 
38
 
39
  # ====================== AGENT ======================
40
  class BasicAgent:
41
  def __init__(self):
42
  self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 
 
 
 
 
 
 
 
 
43
 
44
  def __call__(self, question: str) -> str:
45
+ # VERY STRICT PROMPT – this is what fixes the 0%
46
+ prompt = f"""You are answering a GAIA benchmark question.
47
+ Answer with ONLY the final answer. No explanation, no reasoning, no "Final Answer", no quotes, no extra text.
48
+
49
+ Question: {question}
50
+
51
+ Final Answer:"""
52
+
53
  try:
54
+ output = self.model.generate(prompt, max_new_tokens=150)
55
  answer = str(output).strip()
56
+
57
+ # Extra cleaning
58
  if "Final Answer:" in answer:
59
  answer = answer.split("Final Answer:")[-1].strip()
60
+ if "Answer:" in answer:
61
  answer = answer.split("Answer:")[-1].strip()
62
+
63
+ return answer[:400].strip() # keep it short and clean
64
+
 
 
 
 
65
  except Exception as e:
66
+ return f"Error: {str(e)[:100]}"
67
 
68
 
69
+ # ====================== MAIN FUNCTION ======================
70
  def run_and_submit_all(profile: gr.OAuthProfile | None):
71
  if not profile:
72
+ return "Please login with Hugging Face first.", None
73
 
74
  username = profile.username.strip()
75
  api_url = DEFAULT_API_URL
 
77
  submit_url = f"{api_url}/submit"
78
 
79
  agent = BasicAgent()
 
80
  space_id = os.getenv("SPACE_ID")
81
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
82
 
83
+ # Fetch questions
84
+ resp = requests.get(questions_url, timeout=20)
85
+ questions_data = resp.json()
86
 
87
  answers_payload = []
88
  results_log = []
89
 
90
+ print(f"Running on {len(questions_data)} questions...")
91
+
92
  for item in questions_data:
93
  task_id = item.get("task_id")
94
  question = item.get("question")
 
97
 
98
  answer = agent(question)
99
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
100
+ results_log.append({
101
+ "Task ID": task_id,
102
+ "Question": question[:100] + "...",
103
+ "Answer": answer[:150] + "..." if len(answer) > 150 else answer
104
+ })
105
 
106
+ time.sleep(35) # ← IMPORTANT: avoids rate limit
107
+
108
+ # Submit
109
  submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
110
 
111
  try:
112
+ r = requests.post(submit_url, json=submission_data, timeout=120)
113
+ r.raise_for_status()
114
+ result = r.json()
115
  score = result.get("score", 0)
116
+ status = f"βœ… SUBMITTED!\nScore: {score}% ({result.get('correct_count',0)}/20)\n{result.get('message','')}"
117
  return status, pd.DataFrame(results_log)
118
  except Exception as e:
119
  return f"Submission failed: {str(e)}", pd.DataFrame(results_log)
 
121
 
122
  # ====================== UI ======================
123
  with gr.Blocks() as demo:
124
+ gr.Markdown("# Unit 4 – Final GAIA Agent (Clean Version)")
125
+ gr.Markdown("**Login β†’ Click the button** (takes ~12-18 minutes)")
126
 
127
  gr.LoginButton()
128
  btn = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary", size="large")