Raj989898 commited on
Commit
f04e43e
Β·
verified Β·
1 Parent(s): 2058452

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -88
app.py CHANGED
@@ -4,24 +4,29 @@ import requests
4
  import pandas as pd
5
  import tempfile
6
  import traceback
 
 
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
  # --- File helpers ---
11
  def download_task_file(task_id: str):
 
12
  url = f"{DEFAULT_API_URL}/files/{task_id}"
13
  try:
14
  resp = requests.get(url, timeout=30)
15
  if resp.status_code != 200:
 
16
  return None, None
17
  cd = resp.headers.get("content-disposition", "")
18
  fname = "task_file"
19
  if "filename=" in cd:
20
- fname = cd.split("filename=")[-1].strip().strip('"')
21
  ext = os.path.splitext(fname)[-1] or ".bin"
22
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
23
  tmp.write(resp.content)
24
  tmp.close()
 
25
  return tmp.name, fname
26
  except Exception as e:
27
  print(f"File download error: {e}")
@@ -39,11 +44,32 @@ def read_file_contents(local_path: str, fname: str) -> str:
39
  elif ext in (".py", ".txt", ".md", ".json"):
40
  with open(local_path) as f:
41
  return f.read()
 
 
42
  else:
43
- return f"Binary file: {fname}"
 
 
 
 
 
44
  except Exception as e:
45
  return f"Error reading file: {e}"
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def clean_answer(text: str) -> str:
48
  text = text.strip()
49
  for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:",
@@ -53,69 +79,59 @@ def clean_answer(text: str) -> str:
53
  text = text[len(prefix):].strip()
54
  return text.split("\n")[0].strip().strip('"').strip("'").strip("*").strip()
55
 
56
- # --- Groq API call (free tier, very fast) ---
57
- def call_groq(api_key: str, prompt: str, system: str = "") -> str:
58
  url = "https://api.groq.com/openai/v1/chat/completions"
59
- headers = {
60
- "Authorization": f"Bearer {api_key}",
61
- "Content-Type": "application/json"
62
- }
63
  messages = []
64
  if system:
65
  messages.append({"role": "system", "content": system})
66
  messages.append({"role": "user", "content": prompt})
67
-
68
- body = {
69
- "model": "llama-3.3-70b-versatile",
70
- "messages": messages,
71
- "temperature": 0.0,
72
- "max_tokens": 512,
73
- }
74
  resp = requests.post(url, headers=headers, json=body, timeout=60)
75
  print(f"Groq status: {resp.status_code}")
76
  if resp.status_code != 200:
77
  print(f"Groq error: {resp.text[:400]}")
78
  raise Exception(f"Groq API error {resp.status_code}: {resp.text[:200]}")
79
- data = resp.json()
80
- return data["choices"][0]["message"]["content"].strip()
81
-
82
- # --- Test function ---
83
- def test_api():
84
- api_key = os.getenv("GROQ_API_KEY", "")
85
- if not api_key:
86
- return "❌ GROQ_API_KEY is NOT set in Space Secrets!\nGet a free key at https://console.groq.com"
87
- try:
88
- answer = call_groq(api_key, "What is 2+2? Reply with just the number.",
89
- "Reply with only the bare answer.")
90
- return f"βœ… Groq API working! Test answer: '{answer}'"
91
- except Exception as e:
92
- return f"❌ Groq failed: {e}"
93
 
94
  # --- Web search ---
95
- def search_web(query: str) -> str:
96
  try:
97
  from duckduckgo_search import DDGS
98
  with DDGS() as ddgs:
99
- results = list(ddgs.text(query, max_results=5))
100
  if not results:
101
  return "No results found."
102
  return "\n\n".join(
103
- f"Title: {r.get('title','')}\nSnippet: {r.get('body','')}"
104
  for r in results
105
  )
106
  except Exception as e:
107
  return f"Search error: {e}"
108
 
109
- # --- Agent ---
110
- SYSTEM_PROMPT = """You are an expert AI agent solving GAIA benchmark questions.
 
 
 
 
 
 
 
111
 
112
- RULES:
113
- - Reply with ONLY the final answer. No explanation, no preamble.
114
- - No "The answer is", no "FINAL ANSWER", no prefixes at all.
115
- - Give the shortest correct answer: a name, number, word, or short phrase.
116
- - Exact match grading is used β€” precision matters.
117
- - For numbers: use digits unless words are specifically asked for.
118
- - For lists: comma-separated values.
 
 
 
 
119
  """
120
 
121
  class BasicAgent:
@@ -126,8 +142,7 @@ class BasicAgent:
126
  "GROQ_API_KEY not set!\n"
127
  "1. Go to https://console.groq.com β†’ free account β†’ API Keys β†’ Create key\n"
128
  "2. Space Settings β†’ Variables and Secrets β†’ New Secret\n"
129
- " Name: GROQ_API_KEY Value: your key\n"
130
- "3. Restart Space"
131
  )
132
  print(f"BasicAgent ready. Key: {self.api_key[:8]}...")
133
 
@@ -141,68 +156,87 @@ class BasicAgent:
141
 
142
  print(f"\n{'='*50}\nTask: {task_id}\nQ: {question[:200]}")
143
 
144
- # 1. Download attached file
145
  file_context = ""
146
- if task_id:
147
- local_path, fname = download_task_file(task_id)
148
- if local_path:
149
- contents = read_file_contents(local_path, fname)
150
- file_context = f"\n\n[Attached file: {fname}]\n{contents[:4000]}\n[End of file]\n"
151
- print(f"Got file: {fname}")
152
-
153
- # 2. Execute Python if it's a code file
154
  code_output = ""
155
- if task_id and file_context and fname.endswith(".py"):
156
- try:
157
- import subprocess, sys
158
- result = subprocess.run(
159
- [sys.executable, local_path],
160
- capture_output=True, text=True, timeout=15
161
- )
162
- code_output = f"\n\n[Python execution output]\n{result.stdout}\n{result.stderr}\n[End output]\n"
163
- print(f"Code output: {result.stdout[:200]}")
164
- except Exception as e:
165
- code_output = f"\n[Code execution error: {e}]\n"
166
 
167
- # 3. Web search for factual questions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  search_context = ""
169
- if not file_context:
170
- print("Searching web...")
171
- results = search_web(question[:200])
172
- if results and "error" not in results.lower():
173
- search_context = f"\n\n[Web search results]\n{results[:2500]}\n[End search]\n"
174
-
175
- # 4. Build prompt and ask Groq
 
 
 
 
 
 
 
 
 
 
 
176
  prompt = (
177
  f"Question: {question}"
178
  f"{file_context}"
179
- f"{code_output}"
180
  f"{search_context}"
181
- "\n\nProvide ONLY the final answer. Nothing else."
182
  )
183
 
184
  try:
185
- answer = call_groq(self.api_key, prompt, SYSTEM_PROMPT)
186
  print(f"Raw answer: '{answer}'")
187
 
188
- # If too verbose, ask to extract
189
- if len(answer.split()) > 20:
190
  answer = call_groq(
191
  self.api_key,
192
- f"From this text, extract ONLY the shortest possible final answer "
193
- f"(name, number, or phrase). Nothing else:\n\n{answer}",
194
- "Reply with only the bare answer."
 
195
  )
196
  print(f"Extracted: '{answer}'")
197
 
198
  answer = clean_answer(answer)
199
  print(f"Final: '{answer}'")
200
  return answer
 
201
  except Exception as e:
202
  print(f"Agent error: {e}\n{traceback.format_exc()}")
203
  return ""
204
 
205
- # --- Submit function ---
206
  def run_and_submit_all(profile: gr.OAuthProfile | None):
207
  space_id = os.getenv("SPACE_ID")
208
  if not profile:
@@ -215,8 +249,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
215
  agent = BasicAgent()
216
  except RuntimeError as e:
217
  return f"❌ {e}", None
218
- except Exception as e:
219
- return f"Error initializing agent: {e}", None
220
 
221
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
222
 
@@ -276,13 +308,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
276
  with gr.Blocks() as demo:
277
  gr.Markdown("# Basic Agent Evaluation Runner")
278
  gr.Markdown("""
279
- **One-time FREE setup:**
280
- 1. Go to [console.groq.com](https://console.groq.com) β†’ Sign up free β†’ API Keys β†’ **Create API Key**
281
- 2. In your Space β†’ **Settings β†’ Variables and Secrets β†’ New Secret**
282
- - Name: `GROQ_API_KEY` | Value: paste your key
283
- 3. Restart Space, then click **Test** below to confirm it works
284
  """)
285
-
286
  gr.LoginButton()
287
 
288
  with gr.Row():
 
4
  import pandas as pd
5
  import tempfile
6
  import traceback
7
+ import subprocess
8
+ import sys
9
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- File helpers ---
13
  def download_task_file(task_id: str):
14
+ """Returns (local_path, filename) or (None, None)."""
15
  url = f"{DEFAULT_API_URL}/files/{task_id}"
16
  try:
17
  resp = requests.get(url, timeout=30)
18
  if resp.status_code != 200:
19
+ print(f"No file for task {task_id}: HTTP {resp.status_code}")
20
  return None, None
21
  cd = resp.headers.get("content-disposition", "")
22
  fname = "task_file"
23
  if "filename=" in cd:
24
+ fname = cd.split("filename=")[-1].strip().strip('"').strip("'")
25
  ext = os.path.splitext(fname)[-1] or ".bin"
26
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
27
  tmp.write(resp.content)
28
  tmp.close()
29
+ print(f"Downloaded file: {fname} -> {tmp.name} ({len(resp.content)} bytes)")
30
  return tmp.name, fname
31
  except Exception as e:
32
  print(f"File download error: {e}")
 
44
  elif ext in (".py", ".txt", ".md", ".json"):
45
  with open(local_path) as f:
46
  return f.read()
47
+ elif ext in (".png", ".jpg", ".jpeg", ".gif", ".webp"):
48
+ return f"[IMAGE FILE: {fname}] - This is an image that needs visual analysis."
49
  else:
50
+ # Try reading as text anyway
51
+ try:
52
+ with open(local_path) as f:
53
+ return f.read()
54
+ except:
55
+ return f"Binary file: {fname} ({ext})"
56
  except Exception as e:
57
  return f"Error reading file: {e}"
58
 
59
+ def run_python_file(local_path: str) -> str:
60
+ try:
61
+ result = subprocess.run(
62
+ [sys.executable, local_path],
63
+ capture_output=True, text=True, timeout=15
64
+ )
65
+ output = result.stdout + result.stderr
66
+ print(f"Python output: {output[:300]}")
67
+ return output.strip() if output.strip() else "No output produced."
68
+ except subprocess.TimeoutExpired:
69
+ return "Code execution timed out."
70
+ except Exception as e:
71
+ return f"Execution error: {e}"
72
+
73
  def clean_answer(text: str) -> str:
74
  text = text.strip()
75
  for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:",
 
79
  text = text[len(prefix):].strip()
80
  return text.split("\n")[0].strip().strip('"').strip("'").strip("*").strip()
81
 
82
+ # --- Groq API ---
83
+ def call_groq(api_key: str, prompt: str, system: str = "", max_tokens: int = 512) -> str:
84
  url = "https://api.groq.com/openai/v1/chat/completions"
85
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
 
 
 
86
  messages = []
87
  if system:
88
  messages.append({"role": "system", "content": system})
89
  messages.append({"role": "user", "content": prompt})
90
+ body = {"model": "llama-3.3-70b-versatile", "messages": messages,
91
+ "temperature": 0.0, "max_tokens": max_tokens}
 
 
 
 
 
92
  resp = requests.post(url, headers=headers, json=body, timeout=60)
93
  print(f"Groq status: {resp.status_code}")
94
  if resp.status_code != 200:
95
  print(f"Groq error: {resp.text[:400]}")
96
  raise Exception(f"Groq API error {resp.status_code}: {resp.text[:200]}")
97
+ return resp.json()["choices"][0]["message"]["content"].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  # --- Web search ---
100
+ def search_web(query: str, max_results: int = 6) -> str:
101
  try:
102
  from duckduckgo_search import DDGS
103
  with DDGS() as ddgs:
104
+ results = list(ddgs.text(query, max_results=max_results))
105
  if not results:
106
  return "No results found."
107
  return "\n\n".join(
108
+ f"Title: {r.get('title','')}\nSnippet: {r.get('body','')}\nURL: {r.get('href','')}"
109
  for r in results
110
  )
111
  except Exception as e:
112
  return f"Search error: {e}"
113
 
114
+ def test_api():
115
+ api_key = os.getenv("GROQ_API_KEY", "")
116
+ if not api_key:
117
+ return "❌ GROQ_API_KEY is NOT set in Space Secrets!"
118
+ try:
119
+ answer = call_groq(api_key, "What is 2+2? Reply with just the number.", "Reply with only the bare answer.")
120
+ return f"βœ… Groq API working! Test answer: '{answer}'"
121
+ except Exception as e:
122
+ return f"❌ Groq failed: {e}"
123
 
124
+ # --- System prompt ---
125
+ SYSTEM_PROMPT = """You are an expert AI agent solving GAIA benchmark questions. Exact match grading is used.
126
+
127
+ CRITICAL RULES:
128
+ 1. Reply with ONLY the final answer β€” no explanation, no preamble, no prefix like "The answer is"
129
+ 2. Be as concise as possible: just the name, number, word, or short phrase
130
+ 3. For numbers: use digits (e.g. "42") unless words are specifically requested
131
+ 4. For currency: strip $ signs and commas unless format is specifically asked for (e.g. "1234.56" not "$1,234.56")
132
+ 5. For lists: use comma-separated values with no extra words
133
+ 6. For names: give full name in the exact format requested (first name only if asked for first name)
134
+ 7. Think carefully β€” precision matters for exact matching
135
  """
136
 
137
  class BasicAgent:
 
142
  "GROQ_API_KEY not set!\n"
143
  "1. Go to https://console.groq.com β†’ free account β†’ API Keys β†’ Create key\n"
144
  "2. Space Settings β†’ Variables and Secrets β†’ New Secret\n"
145
+ " Name: GROQ_API_KEY Value: your key"
 
146
  )
147
  print(f"BasicAgent ready. Key: {self.api_key[:8]}...")
148
 
 
156
 
157
  print(f"\n{'='*50}\nTask: {task_id}\nQ: {question[:200]}")
158
 
 
159
  file_context = ""
 
 
 
 
 
 
 
 
160
  code_output = ""
161
+ local_path = None
162
+ fname = None
 
 
 
 
 
 
 
 
 
163
 
164
+ # 1. Always try to download file for every task
165
+ if task_id:
166
+ local_path, fname = download_task_file(task_id)
167
+ if local_path and fname:
168
+ ext = os.path.splitext(fname)[-1].lower()
169
+
170
+ if ext == ".py":
171
+ # Run Python code and capture output
172
+ code_output_text = run_python_file(local_path)
173
+ file_contents = read_file_contents(local_path, fname)
174
+ file_context = (
175
+ f"\n\n[Python file: {fname}]\n"
176
+ f"CODE:\n{file_contents}\n\n"
177
+ f"EXECUTION OUTPUT:\n{code_output_text}\n"
178
+ f"[End of file]\n"
179
+ )
180
+ elif ext in (".xlsx", ".xls", ".csv"):
181
+ contents = read_file_contents(local_path, fname)
182
+ file_context = f"\n\n[Data file: {fname}]\n{contents[:5000]}\n[End of file]\n"
183
+ elif ext in (".png", ".jpg", ".jpeg"):
184
+ file_context = f"\n\n[Note: An image file '{fname}' is attached but cannot be displayed in text. Use your knowledge to answer based on the question context.]\n"
185
+ else:
186
+ contents = read_file_contents(local_path, fname)
187
+ file_context = f"\n\n[Attached file: {fname}]\n{contents[:4000]}\n[End of file]\n"
188
+
189
+ # 2. Web search β€” always search unless we have a code execution result
190
  search_context = ""
191
+ has_code_answer = local_path and fname and os.path.splitext(fname)[-1].lower() == ".py"
192
+
193
+ if not has_code_answer:
194
+ # Build a focused search query
195
+ search_query = question[:200]
196
+ print(f"Searching: {search_query[:80]}...")
197
+ results = search_web(search_query)
198
+ if results and "error" not in results.lower() and "No results" not in results:
199
+ search_context = f"\n\n[Web search results]\n{results[:3000]}\n[End search]\n"
200
+
201
+ # 3. Special handling for reversed text question
202
+ if "rewsna" in question or "dnatsrednu" in question:
203
+ # This is a reversed text question β€” reverse it first
204
+ reversed_q = question[::-1]
205
+ print(f"Reversed question: {reversed_q}")
206
+ question = reversed_q
207
+
208
+ # 4. Build prompt
209
  prompt = (
210
  f"Question: {question}"
211
  f"{file_context}"
 
212
  f"{search_context}"
213
+ "\n\nProvide ONLY the final answer. No explanation. No prefix."
214
  )
215
 
216
  try:
217
+ answer = call_groq(self.api_key, prompt, SYSTEM_PROMPT, max_tokens=256)
218
  print(f"Raw answer: '{answer}'")
219
 
220
+ # If too verbose, extract key part
221
+ if len(answer.split()) > 25:
222
  answer = call_groq(
223
  self.api_key,
224
+ f"From this response, extract ONLY the shortest final answer "
225
+ f"(name, number, or brief phrase). Nothing else:\n\n{answer}",
226
+ "Reply with only the bare answer. No explanation.",
227
+ max_tokens=64
228
  )
229
  print(f"Extracted: '{answer}'")
230
 
231
  answer = clean_answer(answer)
232
  print(f"Final: '{answer}'")
233
  return answer
234
+
235
  except Exception as e:
236
  print(f"Agent error: {e}\n{traceback.format_exc()}")
237
  return ""
238
 
239
+ # --- Submit ---
240
  def run_and_submit_all(profile: gr.OAuthProfile | None):
241
  space_id = os.getenv("SPACE_ID")
242
  if not profile:
 
249
  agent = BasicAgent()
250
  except RuntimeError as e:
251
  return f"❌ {e}", None
 
 
252
 
253
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
254
 
 
308
  with gr.Blocks() as demo:
309
  gr.Markdown("# Basic Agent Evaluation Runner")
310
  gr.Markdown("""
311
+ **Setup:** Add `GROQ_API_KEY` in Space Settings β†’ Variables and Secrets β†’ New Secret.
312
+ Free key at [console.groq.com](https://console.groq.com)
 
 
 
313
  """)
 
314
  gr.LoginButton()
315
 
316
  with gr.Row():