wahibtim commited on
Commit
466c18b
Β·
verified Β·
1 Parent(s): b6de177

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -42
app.py CHANGED
@@ -5,7 +5,7 @@ import pandas as pd
5
  import time
6
  import io
7
  import re
8
- from smolagents import HfApiModel, tool, CodeAgent
9
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
@@ -21,7 +21,7 @@ def web_search(query: str) -> str:
21
  try:
22
  from duckduckgo_search import DDGS
23
  with DDGS() as ddgs:
24
- results = list(ddgs.text(query, max_results=5)) # Increased results for better context
25
  if not results:
26
  return "No results found."
27
  return "\n".join([f"{r.get('title')}: {r.get('body')}" for r in results])
@@ -40,14 +40,17 @@ def download_and_read_file(task_id: str) -> str:
40
  r = requests.get(url, timeout=30)
41
  r.raise_for_status()
42
  content_type = r.headers.get("content-type", "").lower()
43
-
44
  if "csv" in content_type or task_id.lower().endswith(".csv"):
45
  df = pd.read_csv(io.BytesIO(r.content))
46
  return f"CSV Content (First 15 rows):\n{df.head(15).to_string()}\n\nColumns: {df.columns.tolist()}"
47
  elif "text" in content_type or task_id.lower().endswith(".txt"):
48
  return f"Text Content (Snippet):\n{r.text[:2000]}"
49
  else:
50
- return f"File downloaded. Size: {len(r.content)} bytes. If this is an image/pdf, use web_search to find related facts about task {task_id}."
 
 
 
51
  except Exception as e:
52
  return f"Download failed: {str(e)}"
53
 
@@ -55,105 +58,136 @@ def download_and_read_file(task_id: str) -> str:
55
 
56
  class GaiaAgent:
57
  def __init__(self):
58
- # Qwen2.5-Coder is the best choice for reasoning and tool use
59
- self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 
 
 
 
 
 
 
 
60
  self.agent = CodeAgent(
61
  tools=[web_search, download_and_read_file],
62
  model=self.model,
63
  add_base_tools=True,
64
- max_steps=12 # Increased for complex multi-step reasoning
65
  )
66
 
67
  def clean_answer(self, raw_result: str) -> str:
68
  """Removes conversational filler that fails the GAIA grader."""
69
  text = str(raw_result).strip()
70
- # Remove common prefixes
71
- text = re.sub(r'^(the answer is|final answer|result is)[:\s]*', '', text, flags=re.IGNORECASE)
72
- # If it's a long sentence ending in a period, just take the last word/number if it looks like a value
73
- if len(text.split()) > 10:
74
- return text # Keep it if it's complex, but usually, GAIA wants short strings
75
- return text.strip(".")
 
 
76
 
77
  def __call__(self, question: str, task_id: str) -> str:
78
  prompt = f"""Task ID: {task_id}
79
  Question: {question}
80
 
81
  INSTRUCTIONS:
82
- 1. Use your tools to find the exact factual answer.
83
- 2. If the question involves a file, download it first.
84
- 3. YOUR FINAL ANSWER MUST BE EXTREMELY BRIEF.
85
- - Example: '1923' or 'Marie Curie' or '4.52'.
86
- - Do NOT use sentences. Do NOT explain your reasoning in the final answer.
 
 
 
 
87
  """
88
  try:
89
  result = self.agent.run(prompt)
90
- return self.clean_answer(result)
91
- except Exception:
 
92
  return "Unknown"
93
 
94
  # ====================== MAIN LOGIC ======================
95
 
96
  def run_and_submit_all(profile: gr.OAuthProfile | None):
97
  if not profile:
98
- return "❌ Error: Please Login with Hugging Face first!", None
99
 
100
  username = profile.username
101
- agent = GaiaAgent()
102
-
 
 
 
 
 
103
  try:
104
  resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
 
105
  questions = resp.json()
106
  except Exception as e:
107
- return f"Failed to fetch questions: {e}", None
 
 
108
 
109
  answers_payload = []
110
  results_log = []
111
-
112
  for i, item in enumerate(questions):
113
  t_id = item.get("task_id")
114
  q_text = item.get("question")
115
-
116
- print(f"--- Processing {i+1}/20: {t_id} ---")
 
 
117
  answer = agent(q_text, t_id)
118
-
 
119
  answers_payload.append({"task_id": t_id, "submitted_answer": str(answer)})
120
- results_log.append({"Task": t_id, "Answer": str(answer)})
121
-
122
- # 35s is safe, 40s is bulletproof for rate limits
123
- time.sleep(38)
124
 
125
- # FINAL SUBMISSION
 
 
 
126
  space_id = os.getenv("SPACE_ID", "unknown")
127
  submission_data = {
128
  "username": username,
129
  "agent_code": f"https://huggingface.co/spaces/{space_id}",
130
- "answers": answers_payload
131
  }
132
 
133
  try:
134
  r = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=300)
135
  if r.status_code == 200:
136
  res = r.json()
137
- return f"βœ… SCORE: {res.get('score', 0)}% | {res.get('message', '')}", pd.DataFrame(results_log)
 
 
138
  else:
139
- return f"❌ Error {r.status_code}: {r.text}", pd.DataFrame(results_log)
140
  except Exception as e:
141
  return f"❌ Submission Failed: {str(e)}", pd.DataFrame(results_log)
142
 
143
  # ====================== UI ======================
144
 
145
  with gr.Blocks(theme=gr.themes.Default()) as demo:
146
- gr.Markdown("# πŸ† GAIA Certificate Auto-Submitter (Unit 4)")
147
- gr.Markdown("Click Login, then Start. Wait 15 mins. Target: 30%+")
148
-
 
 
 
149
  with gr.Row():
150
  gr.LoginButton()
151
  run_btn = gr.Button("πŸš€ Start Evaluation", variant="primary")
152
-
153
  status_output = gr.Textbox(label="Final Result", lines=3)
154
- table_output = gr.DataFrame(label="Attempt Details")
155
 
156
- run_btn.click(run_and_submit_all, outputs=[status_output, table_output])
157
 
158
  if __name__ == "__main__":
159
  demo.launch()
 
5
  import time
6
  import io
7
  import re
8
+ from smolagents import LiteLLMModel, tool, CodeAgent
9
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
 
21
  try:
22
  from duckduckgo_search import DDGS
23
  with DDGS() as ddgs:
24
+ results = list(ddgs.text(query, max_results=5))
25
  if not results:
26
  return "No results found."
27
  return "\n".join([f"{r.get('title')}: {r.get('body')}" for r in results])
 
40
  r = requests.get(url, timeout=30)
41
  r.raise_for_status()
42
  content_type = r.headers.get("content-type", "").lower()
43
+
44
  if "csv" in content_type or task_id.lower().endswith(".csv"):
45
  df = pd.read_csv(io.BytesIO(r.content))
46
  return f"CSV Content (First 15 rows):\n{df.head(15).to_string()}\n\nColumns: {df.columns.tolist()}"
47
  elif "text" in content_type or task_id.lower().endswith(".txt"):
48
  return f"Text Content (Snippet):\n{r.text[:2000]}"
49
  else:
50
+ return (
51
+ f"File downloaded. Size: {len(r.content)} bytes. "
52
+ f"If this is an image/pdf, use web_search to find related facts about task {task_id}."
53
+ )
54
  except Exception as e:
55
  return f"Download failed: {str(e)}"
56
 
 
58
 
59
  class GaiaAgent:
60
  def __init__(self):
61
+ groq_api_key = os.getenv("GROQ_API_KEY")
62
+ if not groq_api_key:
63
+ raise ValueError("❌ GROQ_API_KEY secret is not set! Add it in HF Spaces β†’ Settings β†’ Secrets.")
64
+
65
+ # llama-3.3-70b-versatile is the best free model on Groq for reasoning
66
+ self.model = LiteLLMModel(
67
+ model_id="groq/llama-3.3-70b-versatile",
68
+ api_key=groq_api_key,
69
+ )
70
+
71
  self.agent = CodeAgent(
72
  tools=[web_search, download_and_read_file],
73
  model=self.model,
74
  add_base_tools=True,
75
+ max_steps=12,
76
  )
77
 
78
  def clean_answer(self, raw_result: str) -> str:
79
  """Removes conversational filler that fails the GAIA grader."""
80
  text = str(raw_result).strip()
81
+ # Remove common prefixes like "The answer is:"
82
+ text = re.sub(
83
+ r'^(the answer is|final answer|result is|answer)[:\s]*',
84
+ '', text, flags=re.IGNORECASE
85
+ )
86
+ # Strip trailing punctuation
87
+ text = text.strip(".").strip()
88
+ return text
89
 
90
  def __call__(self, question: str, task_id: str) -> str:
91
  prompt = f"""Task ID: {task_id}
92
  Question: {question}
93
 
94
  INSTRUCTIONS:
95
+ - Use your tools to find the exact factual answer.
96
+ - If the question mentions a file or attachment, call download_and_read_file("{task_id}") first.
97
+ - If you need up-to-date facts, use web_search.
98
+ - YOUR FINAL ANSWER MUST BE EXTREMELY BRIEF AND EXACT:
99
+ * Numbers: just the number, e.g. '42' or '4.52'
100
+ * Names: just the name, e.g. 'Marie Curie'
101
+ * Dates: just the date, e.g. '1923' or 'July 4, 1776'
102
+ * Lists: comma-separated, e.g. 'apple, banana, cherry'
103
+ - Do NOT write sentences. Do NOT explain. Just the answer.
104
  """
105
  try:
106
  result = self.agent.run(prompt)
107
+ return self.clean_answer(str(result))
108
+ except Exception as e:
109
+ print(f"Agent error on task {task_id}: {e}")
110
  return "Unknown"
111
 
112
  # ====================== MAIN LOGIC ======================
113
 
114
  def run_and_submit_all(profile: gr.OAuthProfile | None):
115
  if not profile:
116
+ return "❌ Please Login with Hugging Face first!", None
117
 
118
  username = profile.username
119
+ print(f"βœ… Logged in as: {username}")
120
+
121
+ try:
122
+ agent = GaiaAgent()
123
+ except ValueError as e:
124
+ return str(e), None
125
+
126
  try:
127
  resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
128
+ resp.raise_for_status()
129
  questions = resp.json()
130
  except Exception as e:
131
+ return f"❌ Failed to fetch questions: {e}", None
132
+
133
+ print(f"πŸ“‹ Fetched {len(questions)} questions.")
134
 
135
  answers_payload = []
136
  results_log = []
137
+
138
  for i, item in enumerate(questions):
139
  t_id = item.get("task_id")
140
  q_text = item.get("question")
141
+
142
+ print(f"\n--- [{i+1}/{len(questions)}] Task: {t_id} ---")
143
+ print(f"Q: {q_text[:120]}...")
144
+
145
  answer = agent(q_text, t_id)
146
+ print(f"A: {answer}")
147
+
148
  answers_payload.append({"task_id": t_id, "submitted_answer": str(answer)})
149
+ results_log.append({"Task ID": t_id, "Question": q_text[:80], "Answer": str(answer)})
 
 
 
150
 
151
+ # Small sleep β€” Groq free tier allows ~30 req/min, no need for 38s waits
152
+ time.sleep(3)
153
+
154
+ # ===== SUBMIT =====
155
  space_id = os.getenv("SPACE_ID", "unknown")
156
  submission_data = {
157
  "username": username,
158
  "agent_code": f"https://huggingface.co/spaces/{space_id}",
159
+ "answers": answers_payload,
160
  }
161
 
162
  try:
163
  r = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=300)
164
  if r.status_code == 200:
165
  res = r.json()
166
+ score = res.get("score", 0)
167
+ message = res.get("message", "")
168
+ return f"βœ… SCORE: {score}% | {message}", pd.DataFrame(results_log)
169
  else:
170
+ return f"❌ Submission Error {r.status_code}: {r.text}", pd.DataFrame(results_log)
171
  except Exception as e:
172
  return f"❌ Submission Failed: {str(e)}", pd.DataFrame(results_log)
173
 
174
  # ====================== UI ======================
175
 
176
  with gr.Blocks(theme=gr.themes.Default()) as demo:
177
+ gr.Markdown("# πŸ† GAIA Certificate Agent (Unit 4 Final)")
178
+ gr.Markdown(
179
+ "**Steps:** 1) Login with HF below β†’ 2) Click Start β†’ 3) Wait ~5 mins β†’ 4) Check your score!\n\n"
180
+ "> Make sure `GROQ_API_KEY` is set in your Space **Settings β†’ Secrets**."
181
+ )
182
+
183
  with gr.Row():
184
  gr.LoginButton()
185
  run_btn = gr.Button("πŸš€ Start Evaluation", variant="primary")
186
+
187
  status_output = gr.Textbox(label="Final Result", lines=3)
188
+ table_output = gr.DataFrame(label="Answer Log")
189
 
190
+ run_btn.click(fn=run_and_submit_all, outputs=[status_output, table_output])
191
 
192
  if __name__ == "__main__":
193
  demo.launch()