Karim0111 commited on
Commit
4f72328
Β·
verified Β·
1 Parent(s): eebecd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -109
app.py CHANGED
@@ -1,157 +1,235 @@
1
- import os
2
- import gradio as gr
3
- import requests
4
  import pandas as pd
5
  import re
6
  from huggingface_hub import InferenceClient
7
- import time
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
- # --- Free GAIA Agent Definition ---
13
- class EnhancedGAIAAgentFree:
 
 
 
14
  """
15
- GAIA Agent for free HuggingFace models.
16
- Returns 'unknown' for images, videos, code, Excel, or unsupported questions.
17
  """
18
 
19
  def __init__(self):
20
- print("πŸš€ GAIAAgent initializing... (FREE version)")
21
- hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
 
 
 
 
 
 
22
  if not hf_token:
23
- print("⚠️ HF_TOKEN not found! Add it to Space secrets.")
24
- self.client = None
25
- self.model = None
26
- return
27
 
28
  self.client = InferenceClient(token=hf_token)
29
- self.model = "TheBloke/guanaco-7B-GPTQ" # free model
 
 
 
 
 
30
  print(f"βœ… Model loaded: {self.model}")
31
 
32
  def __call__(self, question: str) -> str:
33
- """
34
- Answer a question or return 'unknown' for unsupported content.
35
- """
 
 
 
 
 
 
 
 
36
  try:
37
- # Filter unsupported content
38
- unsupported = ["image", "video", "file", "attached", "excel", "python code", "chart", "csv"]
39
- if any(word in question.lower() for word in unsupported):
40
- return "unknown"
41
-
42
- # Very short or empty questions
43
- if not question.strip() or len(question.strip()) < 5:
44
- return "unknown"
45
-
46
- # Free text generation for supported questions
47
- prompt = f"""
48
- You are an expert for GAIA benchmark.
49
- Answer concisely. ONLY provide the final answer.
50
- If you cannot determine the answer, write "unknown".
51
- Question: {question}
52
- FINAL ANSWER:"""
53
-
54
- response = self.client.text_generation(
55
- model=self.model,
56
- prompt=prompt,
57
- max_new_tokens=128,
58
- temperature=0.1,
59
- do_sample=False,
60
- return_full_text=False
61
- )
62
-
63
- # Clean response
64
- answer = response.strip() if response else "unknown"
65
  return answer
66
-
67
  except Exception as e:
68
  print(f"❌ Agent error: {e}")
69
- return "unknown"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
 
 
 
 
 
 
72
  def run_and_submit_all(profile: gr.OAuthProfile | None):
73
- """
74
- Run agent on all questions and submit results.
75
- """
76
- space_id = os.getenv("SPACE_ID")
77
- if profile:
78
- username = profile.username
79
- else:
80
- return "Please login to HuggingFace.", None
81
 
82
  questions_url = f"{DEFAULT_API_URL}/questions"
83
  submit_url = f"{DEFAULT_API_URL}/submit"
84
 
85
- # Instantiate Agent
86
- agent = EnhancedGAIAAgentFree()
87
- if not agent.client or not agent.model:
88
- return "⚠️ HF_TOKEN not found! Add it to Space secrets.", None
89
 
90
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
91
 
92
- # Fetch Questions
93
- try:
94
- response = requests.get(questions_url, timeout=15)
95
- response.raise_for_status()
96
- questions_data = response.json()
97
- except Exception as e:
98
- return f"Error fetching questions: {e}", None
99
 
100
- results_log = []
 
 
 
101
  answers_payload = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- for idx, item in enumerate(questions_data):
104
- task_id = item.get("task_id")
105
- question_text = item.get("question")
106
- if not task_id or question_text is None:
107
- continue
108
 
109
- submitted_answer = agent(question_text)
110
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
111
  results_log.append({
112
  "Task ID": task_id,
113
- "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
114
- "Answer": submitted_answer[:80] + "..." if len(submitted_answer) > 80 else submitted_answer
115
  })
116
 
117
- if not answers_payload:
118
- return "No answers generated.", pd.DataFrame(results_log)
119
-
120
- # Submit Results
121
- submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
122
- try:
123
- response = requests.post(submit_url, json=submission_data, timeout=60)
124
- response.raise_for_status()
125
- result_data = response.json()
126
- final_status = (
127
- f"πŸŽ‰ Submission Successful!\n"
128
- f"User: {result_data.get('username')}\n"
129
- f"Score: {result_data.get('score', 'N/A')}% "
130
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
131
- )
132
- return final_status, pd.DataFrame(results_log)
133
- except Exception as e:
134
- return f"Submission failed: {e}", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
135
 
136
 
137
- # --- Gradio Interface ---
138
- with gr.Blocks(title="GAIA Agent Evaluation (Free)") as demo:
139
- gr.Markdown("# πŸ€— GAIA Benchmark Agent (Free)")
140
  gr.Markdown(
141
  """
142
- **Setup Required:**
143
- 1. Add HF_TOKEN to Space secrets (Settings β†’ Variables and secrets)
144
- 2. Get free token at: https://huggingface.co/settings/tokens (Read access)
145
- 3. Login with HuggingFace, then click Run Evaluation.
146
- """
 
 
 
 
147
  )
148
 
149
  gr.LoginButton()
150
- run_button = gr.Button("πŸš€ Run Evaluation", variant="primary", size="lg")
151
- status_output = gr.Textbox(label="Status", lines=8, interactive=False)
152
- results_table = gr.DataFrame(label="Results", wrap=True)
 
 
 
 
153
 
154
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
155
 
156
  if __name__ == "__main__":
157
- demo.launch(debug=True, share=False)
 
 
 
 
1
  import pandas as pd
2
  import re
3
  from huggingface_hub import InferenceClient
4
+
5
 
6
  # --- Constants ---
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
 
9
+
10
+ # =========================
11
+ # GAIA OPTIMIZED AGENT
12
+ # =========================
13
+ class GAIAAgent:
14
  """
15
+ GAIA benchmark agent – chat-only, nscale-safe, exact answers.
16
+
17
  """
18
 
19
  def __init__(self):
20
+ print("πŸš€ GAIAAgent initializing...")
21
+
22
+ hf_token = (
23
+ os.getenv("HF_TOKEN")
24
+ or os.getenv("HUGGING_FACE_HUB_TOKEN")
25
+ or os.getenv("HF_API_TOKEN")
26
+ )
27
+
28
  if not hf_token:
29
+ raise RuntimeError("HF_TOKEN not found in Space secrets")
30
+
31
+
32
+
33
 
34
  self.client = InferenceClient(token=hf_token)
35
+
36
+ # βœ… SAFE MODELS (chat-only)
37
+ self.model = "meta-llama/Meta-Llama-3-8B-Instruct"
38
+ # Alternative:
39
+ # self.model = "Qwen/Qwen2.5-7B-Instruct"
40
+
41
  print(f"βœ… Model loaded: {self.model}")
42
 
43
  def __call__(self, question: str) -> str:
44
+ print(f"\nQ: {question[:120]}")
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+
55
  try:
56
+ answer = self._chat_answer(question)
57
+ print(f"A: {answer}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  return answer
 
59
  except Exception as e:
60
  print(f"❌ Agent error: {e}")
61
+ return "Unable to determine answer"
62
+
63
+ def _chat_answer(self, question: str) -> str:
64
+ messages = [
65
+ {
66
+ "role": "system",
67
+ "content": (
68
+ "You are an expert GAIA benchmark solver.\n"
69
+ "Answer EXACTLY what is asked.\n"
70
+ "Return ONLY the final answer.\n"
71
+ "No explanations, no prefixes, no formatting."
72
+ )
73
+ },
74
+ {
75
+ "role": "user",
76
+ "content": question
77
+ }
78
+ ]
79
+
80
+ response = self.client.chat_completion(
81
+ model=self.model,
82
+ messages=messages,
83
+ max_tokens=256,
84
+ temperature=0.0,
85
+
86
+
87
+ )
88
+
89
+ if not response or not response.choices:
90
+ return "Unable to determine answer"
91
+
92
+ raw = response.choices[0].message.content.strip()
93
+ return self._clean_answer(raw)
94
+
95
+ def _clean_answer(self, text: str) -> str:
96
+ """
97
+ GAIA-safe cleaning: minimal, no hallucinated trimming.
98
+ """
99
+ text = text.strip()
100
+
101
+ # Remove common junk if model disobeys
102
+ bad_prefixes = [
103
+ "answer:",
104
+ "final answer:",
105
+ "the answer is",
106
+ "result:"
107
+ ]
108
+
109
+ for p in bad_prefixes:
110
+ if text.lower().startswith(p):
111
+ text = text[len(p):].strip()
112
+
113
+ # If multi-line, keep first meaningful line
114
+ if "\n" in text:
115
+ text = text.split("\n")[0].strip()
116
+
117
+ # GAIA prefers concise
118
+ if len(text.split()) > 12:
119
+ # keep last sentence
120
+ parts = re.split(r"[.!?]", text)
121
+ text = parts[-2].strip() if len(parts) > 1 else parts[0].strip()
122
+
123
+ return text
124
+
125
+
126
 
127
 
128
+
129
+
130
+
131
+ # =========================
132
+ # RUN + SUBMIT
133
+ # =========================
134
  def run_and_submit_all(profile: gr.OAuthProfile | None):
135
+
136
+ if not profile:
137
+ return "Please login with Hugging Face.", None
138
+
139
+ username = profile.username
140
+ print(f"πŸ‘€ User: {username}")
141
+
142
+
143
 
144
  questions_url = f"{DEFAULT_API_URL}/questions"
145
  submit_url = f"{DEFAULT_API_URL}/submit"
146
 
147
+ agent = GAIAAgent()
 
 
 
148
 
 
149
 
 
 
 
 
 
 
 
150
 
151
+
152
+ # Fetch questions
153
+ questions = requests.get(questions_url, timeout=15).json()
154
+
155
  answers_payload = []
156
+ results_log = []
157
+
158
+ for idx, item in enumerate(questions):
159
+ task_id = item["task_id"]
160
+ question = item["question"]
161
+
162
+
163
+
164
+
165
+
166
+ print(f"\n[{idx+1}/{len(questions)}] {task_id}")
167
+ answer = agent(question)
168
+
169
+ answers_payload.append({
170
+ "task_id": task_id,
171
+ "submitted_answer": answer
172
+ })
173
+
174
+
175
 
 
 
 
 
 
176
 
 
 
177
  results_log.append({
178
  "Task ID": task_id,
179
+ "Answer": answer
180
+
181
  })
182
 
183
+ submission = {
184
+ "username": username,
185
+ "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
186
+ "answers": answers_payload
187
+ }
188
+
189
+ response = requests.post(submit_url, json=submission, timeout=60)
190
+ result = response.json()
191
+
192
+ status = (
193
+ f"πŸŽ‰ Submission Successful\n\n"
194
+ f"Score: {result.get('score')}%\n"
195
+ f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}"
196
+ )
197
+
198
+ return status, pd.DataFrame(results_log)
199
+
200
+
201
+
202
+ # =========================
203
+ # GRADIO UI
204
+ # =========================
205
+ with gr.Blocks(title="GAIA Agent") as demo:
206
+ gr.Markdown("# πŸ€— GAIA Benchmark Agent (Fixed)")
207
+
208
+
209
 
210
 
 
 
 
211
  gr.Markdown(
212
  """
213
+ βœ… Chat-only
214
+ βœ… nscale-safe
215
+ βœ… GAIA-optimized
216
+
217
+ **Steps**
218
+ 1. Add `HF_TOKEN` to Space secrets
219
+ 2. Login with Hugging Face
220
+ 3. Click Run
221
+ """
222
  )
223
 
224
  gr.LoginButton()
225
+ run_btn = gr.Button("πŸš€ Run Evaluation", variant="primary")
226
+
227
+ status = gr.Textbox(label="Status", lines=6)
228
+ table = gr.DataFrame(label="Results")
229
+
230
+ run_btn.click(run_and_submit_all, outputs=[status, table])
231
+
232
 
 
233
 
234
  if __name__ == "__main__":
235
+ demo.launch(debug=True)