wahibtim commited on
Commit
b6de177
Β·
verified Β·
1 Parent(s): 41c61c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -65
app.py CHANGED
@@ -4,8 +4,8 @@ import requests
4
  import pandas as pd
5
  import time
6
  import io
 
7
  from smolagents import HfApiModel, tool, CodeAgent
8
- from PIL import Image
9
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
@@ -21,7 +21,7 @@ def web_search(query: str) -> str:
21
  try:
22
  from duckduckgo_search import DDGS
23
  with DDGS() as ddgs:
24
- results = list(ddgs.text(query, max_results=3))
25
  if not results:
26
  return "No results found."
27
  return "\n".join([f"{r.get('title')}: {r.get('body')}" for r in results])
@@ -31,114 +31,129 @@ def web_search(query: str) -> str:
31
  @tool
32
  def download_and_read_file(task_id: str) -> str:
33
  """
34
- Downloads the file associated with a specific task_id and returns its content as text.
35
  Args:
36
- task_id: The unique ID for the task to fetch the file for.
37
  """
38
  url = f"{DEFAULT_API_URL}/files/{task_id}"
39
  try:
40
- r = requests.get(url, timeout=20)
41
  r.raise_for_status()
42
  content_type = r.headers.get("content-type", "").lower()
43
 
44
- # Logic to handle different file types so the Agent can "read" them
45
- if "text/csv" in content_type or task_id.endswith(".csv"):
46
  df = pd.read_csv(io.BytesIO(r.content))
47
- return f"CSV Content (first 5 rows):\n{df.head().to_string()}"
48
- elif "text/plain" in content_type:
49
- return f"Text File Content:\n{r.text[:2000]}" # Limit size
50
- elif "image" in content_type:
51
- img = Image.open(io.BytesIO(r.content))
52
- return f"Image downloaded. Dimensions: {img.size}. (Note: Use web search if you need to identify contents of specific historical images)."
53
  else:
54
- return f"File downloaded ({len(r.content)} bytes), but format is binary/unsupported for direct text reading."
55
  except Exception as e:
56
- return f"File download failed: {str(e)}"
57
 
58
- # ====================== AGENT LOGIC ======================
59
 
60
  class GaiaAgent:
61
  def __init__(self):
62
- # We use Qwen because it's excellent at writing the code needed for CodeAgent
63
  self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
64
  self.agent = CodeAgent(
65
  tools=[web_search, download_and_read_file],
66
  model=self.model,
67
- add_base_tools=True # Gives the agent access to things like 'line_viewer'
 
68
  )
69
 
 
 
 
 
 
 
 
 
 
 
70
  def __call__(self, question: str, task_id: str) -> str:
71
- # We explicitly tell the agent the Task ID so it can use the download tool
72
- full_query = f"Task ID: {task_id}\nQuestion: {question}\n\nPlease solve this and provide only the final answer."
 
 
 
 
 
 
 
 
73
  try:
74
- # The .run() method starts the ReAct loop
75
- result = self.agent.run(full_query)
76
- return str(result).strip()
77
- except Exception as e:
78
- return f"Error: {str(e)}"
79
 
80
- # ====================== GRADIO INTERFACE & SUBMISSION ======================
81
 
82
  def run_and_submit_all(profile: gr.OAuthProfile | None):
83
  if not profile:
84
- return "Please login with Hugging Face first.", None
85
 
86
  username = profile.username
87
- questions_url = f"{DEFAULT_API_URL}/questions"
88
- submit_url = f"{DEFAULT_API_URL}/submit"
89
-
90
  agent = GaiaAgent()
91
- space_id = os.getenv("SPACE_ID")
92
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
93
-
94
- # 1. Get Questions
95
  try:
96
- resp = requests.get(questions_url, timeout=20)
97
- questions_data = resp.json()
98
  except Exception as e:
99
  return f"Failed to fetch questions: {e}", None
100
 
101
  answers_payload = []
102
  results_log = []
103
-
104
- # 2. Process Questions
105
- for item in questions_data:
106
- task_id = item.get("task_id")
107
- question = item.get("question")
108
 
109
- # Run the agent!
110
- answer = agent(question, task_id)
111
 
112
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
113
- results_log.append({"Task ID": task_id, "Answer": answer})
114
-
115
- # CRITICAL: Sleep to stay within Hugging Face free API limits
116
- time.sleep(35)
117
 
118
- # 3. Submit Results
 
119
  submission_data = {
120
  "username": username,
121
- "agent_code": agent_code,
122
  "answers": answers_payload
123
  }
124
 
125
  try:
126
- r = requests.post(submit_url, json=submission_data, timeout=120)
127
- r.raise_for_status()
128
- result = r.json()
129
- status = f"βœ… Score: {result.get('score')}% | Correct: {result.get('correct_count')}/20"
130
- return status, pd.DataFrame(results_log)
 
131
  except Exception as e:
132
- return f"Submission failed: {e}", pd.DataFrame(results_log)
133
-
134
- # UI setup remains the same
135
- with gr.Blocks() as demo:
136
- gr.Markdown("# πŸ€– Unit 4 GAIA Agent")
137
- gr.LoginButton()
138
- btn = gr.Button("πŸš€ Run & Submit", variant="primary")
139
- status = gr.Textbox(label="Status")
140
- table = gr.DataFrame(label="Results")
141
- btn.click(run_and_submit_all, outputs=[status, table])
 
 
 
 
 
 
142
 
143
  if __name__ == "__main__":
144
  demo.launch()
 
4
  import pandas as pd
5
  import time
6
  import io
7
+ import re
8
  from smolagents import HfApiModel, tool, CodeAgent
 
9
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
 
21
  try:
22
  from duckduckgo_search import DDGS
23
  with DDGS() as ddgs:
24
+ results = list(ddgs.text(query, max_results=5)) # Increased results for better context
25
  if not results:
26
  return "No results found."
27
  return "\n".join([f"{r.get('title')}: {r.get('body')}" for r in results])
 
31
  @tool
32
  def download_and_read_file(task_id: str) -> str:
33
  """
34
+ Downloads the file for a task and returns its content.
35
  Args:
36
+ task_id: The unique ID for the task file.
37
  """
38
  url = f"{DEFAULT_API_URL}/files/{task_id}"
39
  try:
40
+ r = requests.get(url, timeout=30)
41
  r.raise_for_status()
42
  content_type = r.headers.get("content-type", "").lower()
43
 
44
+ if "csv" in content_type or task_id.lower().endswith(".csv"):
 
45
  df = pd.read_csv(io.BytesIO(r.content))
46
+ return f"CSV Content (First 15 rows):\n{df.head(15).to_string()}\n\nColumns: {df.columns.tolist()}"
47
+ elif "text" in content_type or task_id.lower().endswith(".txt"):
48
+ return f"Text Content (Snippet):\n{r.text[:2000]}"
 
 
 
49
  else:
50
+ return f"File downloaded. Size: {len(r.content)} bytes. If this is an image/pdf, use web_search to find related facts about task {task_id}."
51
  except Exception as e:
52
+ return f"Download failed: {str(e)}"
53
 
54
+ # ====================== AGENT ======================
55
 
56
  class GaiaAgent:
57
  def __init__(self):
58
+ # Qwen2.5-Coder is the best choice for reasoning and tool use
59
  self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
60
  self.agent = CodeAgent(
61
  tools=[web_search, download_and_read_file],
62
  model=self.model,
63
+ add_base_tools=True,
64
+ max_steps=12 # Increased for complex multi-step reasoning
65
  )
66
 
67
+ def clean_answer(self, raw_result: str) -> str:
68
+ """Removes conversational filler that fails the GAIA grader."""
69
+ text = str(raw_result).strip()
70
+ # Remove common prefixes
71
+ text = re.sub(r'^(the answer is|final answer|result is)[:\s]*', '', text, flags=re.IGNORECASE)
72
+ # If it's a long sentence ending in a period, just take the last word/number if it looks like a value
73
+ if len(text.split()) > 10:
74
+ return text # Keep it if it's complex, but usually, GAIA wants short strings
75
+ return text.strip(".")
76
+
77
  def __call__(self, question: str, task_id: str) -> str:
78
+ prompt = f"""Task ID: {task_id}
79
+ Question: {question}
80
+
81
+ INSTRUCTIONS:
82
+ 1. Use your tools to find the exact factual answer.
83
+ 2. If the question involves a file, download it first.
84
+ 3. YOUR FINAL ANSWER MUST BE EXTREMELY BRIEF.
85
+ - Example: '1923' or 'Marie Curie' or '4.52'.
86
+ - Do NOT use sentences. Do NOT explain your reasoning in the final answer.
87
+ """
88
  try:
89
+ result = self.agent.run(prompt)
90
+ return self.clean_answer(result)
91
+ except Exception:
92
+ return "Unknown"
 
93
 
94
+ # ====================== MAIN LOGIC ======================
95
 
96
  def run_and_submit_all(profile: gr.OAuthProfile | None):
97
  if not profile:
98
+ return "❌ Error: Please Login with Hugging Face first!", None
99
 
100
  username = profile.username
 
 
 
101
  agent = GaiaAgent()
102
+
 
 
 
103
  try:
104
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
105
+ questions = resp.json()
106
  except Exception as e:
107
  return f"Failed to fetch questions: {e}", None
108
 
109
  answers_payload = []
110
  results_log = []
111
+
112
+ for i, item in enumerate(questions):
113
+ t_id = item.get("task_id")
114
+ q_text = item.get("question")
 
115
 
116
+ print(f"--- Processing {i+1}/20: {t_id} ---")
117
+ answer = agent(q_text, t_id)
118
 
119
+ answers_payload.append({"task_id": t_id, "submitted_answer": str(answer)})
120
+ results_log.append({"Task": t_id, "Answer": str(answer)})
121
+
122
+ # 35s is safe, 40s is bulletproof for rate limits
123
+ time.sleep(38)
124
 
125
+ # FINAL SUBMISSION
126
+ space_id = os.getenv("SPACE_ID", "unknown")
127
  submission_data = {
128
  "username": username,
129
+ "agent_code": f"https://huggingface.co/spaces/{space_id}",
130
  "answers": answers_payload
131
  }
132
 
133
  try:
134
+ r = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=300)
135
+ if r.status_code == 200:
136
+ res = r.json()
137
+ return f"βœ… SCORE: {res.get('score', 0)}% | {res.get('message', '')}", pd.DataFrame(results_log)
138
+ else:
139
+ return f"❌ Error {r.status_code}: {r.text}", pd.DataFrame(results_log)
140
  except Exception as e:
141
+ return f"❌ Submission Failed: {str(e)}", pd.DataFrame(results_log)
142
+
143
+ # ====================== UI ======================
144
+
145
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
146
+ gr.Markdown("# πŸ† GAIA Certificate Auto-Submitter (Unit 4)")
147
+ gr.Markdown("Click Login, then Start. Wait 15 mins. Target: 30%+")
148
+
149
+ with gr.Row():
150
+ gr.LoginButton()
151
+ run_btn = gr.Button("πŸš€ Start Evaluation", variant="primary")
152
+
153
+ status_output = gr.Textbox(label="Final Result", lines=3)
154
+ table_output = gr.DataFrame(label="Attempt Details")
155
+
156
+ run_btn.click(run_and_submit_all, outputs=[status_output, table_output])
157
 
158
  if __name__ == "__main__":
159
  demo.launch()