Karim0111 commited on
Commit
126c45b
·
verified ·
1 Parent(s): 4f72328

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -214
app.py CHANGED
@@ -1,235 +1,126 @@
 
 
 
1
  import pandas as pd
2
- import re
3
- from huggingface_hub import InferenceClient
4
-
5
 
6
  # --- Constants ---
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
8
 
9
-
10
- # =========================
11
- # GAIA OPTIMIZED AGENT
12
- # =========================
13
- class GAIAAgent:
14
- """
15
- GAIA benchmark agent chat-only, nscale-safe, exact answers.
16
-
17
- """
18
-
19
- def __init__(self):
20
- print("🚀 GAIAAgent initializing...")
21
-
22
- hf_token = (
23
- os.getenv("HF_TOKEN")
24
- or os.getenv("HUGGING_FACE_HUB_TOKEN")
25
- or os.getenv("HF_API_TOKEN")
26
- )
27
-
28
- if not hf_token:
29
- raise RuntimeError("HF_TOKEN not found in Space secrets")
30
-
31
-
32
-
33
-
34
- self.client = InferenceClient(token=hf_token)
35
-
36
- # ✅ SAFE MODELS (chat-only)
37
- self.model = "meta-llama/Meta-Llama-3-8B-Instruct"
38
- # Alternative:
39
- # self.model = "Qwen/Qwen2.5-7B-Instruct"
40
-
41
- print(f"✅ Model loaded: {self.model}")
42
 
43
  def __call__(self, question: str) -> str:
44
- print(f"\nQ: {question[:120]}")
45
-
46
-
47
-
48
-
49
-
50
-
51
-
52
-
53
-
54
-
55
  try:
56
- answer = self._chat_answer(question)
57
- print(f"A: {answer}")
58
- return answer
 
 
 
 
59
  except Exception as e:
60
- print(f" Agent error: {e}")
61
- return "Unable to determine answer"
62
-
63
- def _chat_answer(self, question: str) -> str:
64
- messages = [
65
- {
66
- "role": "system",
67
- "content": (
68
- "You are an expert GAIA benchmark solver.\n"
69
- "Answer EXACTLY what is asked.\n"
70
- "Return ONLY the final answer.\n"
71
- "No explanations, no prefixes, no formatting."
72
- )
73
- },
74
- {
75
- "role": "user",
76
- "content": question
77
- }
78
- ]
79
-
80
- response = self.client.chat_completion(
81
- model=self.model,
82
- messages=messages,
83
- max_tokens=256,
84
- temperature=0.0,
85
-
86
-
87
- )
88
-
89
- if not response or not response.choices:
90
- return "Unable to determine answer"
91
-
92
- raw = response.choices[0].message.content.strip()
93
- return self._clean_answer(raw)
94
-
95
- def _clean_answer(self, text: str) -> str:
96
- """
97
- GAIA-safe cleaning: minimal, no hallucinated trimming.
98
- """
99
- text = text.strip()
100
-
101
- # Remove common junk if model disobeys
102
- bad_prefixes = [
103
- "answer:",
104
- "final answer:",
105
- "the answer is",
106
- "result:"
107
- ]
108
-
109
- for p in bad_prefixes:
110
- if text.lower().startswith(p):
111
- text = text[len(p):].strip()
112
-
113
- # If multi-line, keep first meaningful line
114
- if "\n" in text:
115
- text = text.split("\n")[0].strip()
116
-
117
- # GAIA prefers concise
118
- if len(text.split()) > 12:
119
- # keep last sentence
120
- parts = re.split(r"[.!?]", text)
121
- text = parts[-2].strip() if len(parts) > 1 else parts[0].strip()
122
-
123
- return text
124
-
125
-
126
-
127
-
128
-
129
-
130
 
131
- # =========================
132
- # RUN + SUBMIT
133
- # =========================
134
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
135
 
136
- if not profile:
137
- return "Please login with Hugging Face.", None
138
-
139
- username = profile.username
140
- print(f"👤 User: {username}")
141
-
142
-
143
-
144
- questions_url = f"{DEFAULT_API_URL}/questions"
145
- submit_url = f"{DEFAULT_API_URL}/submit"
146
-
147
- agent = GAIAAgent()
148
 
 
 
149
 
 
 
 
 
 
150
 
 
 
151
 
152
  # Fetch questions
153
- questions = requests.get(questions_url, timeout=15).json()
154
-
155
- answers_payload = []
 
 
 
 
 
 
 
 
 
156
  results_log = []
157
-
158
- for idx, item in enumerate(questions):
159
- task_id = item["task_id"]
160
- question = item["question"]
161
-
162
-
163
-
164
-
165
-
166
- print(f"\n[{idx+1}/{len(questions)}] {task_id}")
167
- answer = agent(question)
168
-
169
- answers_payload.append({
170
- "task_id": task_id,
171
- "submitted_answer": answer
172
- })
173
-
174
-
175
-
176
-
177
- results_log.append({
178
- "Task ID": task_id,
179
- "Answer": answer
180
-
181
- })
182
-
183
- submission = {
184
- "username": username,
185
- "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
186
- "answers": answers_payload
187
- }
188
-
189
- response = requests.post(submit_url, json=submission, timeout=60)
190
- result = response.json()
191
-
192
- status = (
193
- f"🎉 Submission Successful\n\n"
194
- f"Score: {result.get('score')}%\n"
195
- f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}"
196
- )
197
-
198
- return status, pd.DataFrame(results_log)
199
-
200
-
201
-
202
- # =========================
203
- # GRADIO UI
204
- # =========================
205
- with gr.Blocks(title="GAIA Agent") as demo:
206
- gr.Markdown("# 🤗 GAIA Benchmark Agent (Fixed)")
207
-
208
-
209
-
210
-
211
- gr.Markdown(
212
- """
213
- ✅ Chat-only
214
- ✅ nscale-safe
215
- ✅ GAIA-optimized
216
-
217
- **Steps**
218
- 1. Add `HF_TOKEN` to Space secrets
219
- 2. Login with Hugging Face
220
- 3. Click Run
221
- """
222
  )
223
 
224
- gr.LoginButton()
225
- run_btn = gr.Button("🚀 Run Evaluation", variant="primary")
226
-
227
- status = gr.Textbox(label="Status", lines=6)
228
- table = gr.DataFrame(label="Results")
229
-
230
- run_btn.click(run_and_submit_all, outputs=[status, table])
231
-
232
-
233
-
234
  if __name__ == "__main__":
235
- demo.launch(debug=True)
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
  import pandas as pd
 
 
 
5
 
6
  # --- Constants ---
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
+ HF_TOKEN = os.getenv("HF_TOKEN") # Make sure your HF read token is set in environment variables
9
 
10
+ # --- Gaia Agent using Qwen API ---
11
+ class GaiaAgentQwen:
12
+ def __init__(self, model="Qwen/Qwen2.5-Coder-32B-Instruct"):
13
+ self.model = model
14
+ self.api_url = f"https://api-inference.huggingface.co/models/{model}"
15
+ self.headers = {"Authorization": f"Bearer {HF_TOKEN}"}
16
+ print(f"GaiaAgentQwen initialized with model {model}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def __call__(self, question: str) -> str:
19
+ prompt = f"Answer the following question concisely and correctly:\n{question}"
20
+ payload = {"inputs": prompt, "options": {"wait_for_model": True}}
 
 
 
 
 
 
 
 
 
21
  try:
22
+ response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=60)
23
+ response.raise_for_status()
24
+ data = response.json()
25
+ if isinstance(data, list) and "generated_text" in data[0]:
26
+ return data[0]["generated_text"]
27
+ else:
28
+ return str(data) # fallback
29
  except Exception as e:
30
+ print(f"Error calling HF Inference API: {e}")
31
+ return f"API ERROR: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # --- Main function ---
 
 
34
  def run_and_submit_all(profile: gr.OAuthProfile | None):
35
+ api_url = DEFAULT_API_URL
36
+ space_id = os.getenv("SPACE_ID") or "unknown-space"
37
 
38
+ username = profile.username if profile else "anonymous"
39
+ if profile:
40
+ print(f"User logged in: {username}")
41
+ else:
42
+ print("User not logged in.")
 
 
 
 
 
 
 
43
 
44
+ questions_url = f"{api_url}/questions"
45
+ submit_url = f"{api_url}/submit"
46
 
47
+ # Instantiate the Gaia agent
48
+ try:
49
+ agent = GaiaAgentQwen()
50
+ except Exception as e:
51
+ return f"Error initializing agent: {e}", None
52
 
53
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
54
+ print(f"Agent code repo: {agent_code}")
55
 
56
  # Fetch questions
57
+ try:
58
+ print(f"Fetching questions from: {questions_url}")
59
+ response = requests.get(questions_url, timeout=10)
60
+ response.raise_for_status()
61
+ questions_data = response.json()
62
+ if not questions_data:
63
+ return "Fetched questions list is empty or invalid format.", None
64
+ print(f"Fetched {len(questions_data)} questions.")
65
+ except Exception as e:
66
+ return f"Error fetching questions: {e}", None
67
+
68
+ # Run agent on questions
69
  results_log = []
70
+ answers_payload = []
71
+ print(f"Running agent on {len(questions_data)} questions...")
72
+ for item in questions_data:
73
+ task_id = item.get("task_id")
74
+ question_text = item.get("question", "")
75
+ if not task_id or not question_text:
76
+ print(f"Skipping invalid question: {item}")
77
+ continue
78
+ try:
79
+ answer = agent(question_text)
80
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
81
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
82
+ except Exception as e:
83
+ print(f"Error running agent on task {task_id}: {e}")
84
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
85
+
86
+ if not answers_payload:
87
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
88
+
89
+ # Submit answers
90
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
91
+ try:
92
+ print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
93
+ response = requests.post(submit_url, json=submission_data, timeout=20)
94
+ response.raise_for_status()
95
+ submission_result = response.json()
96
+ print(f"Submission result: {submission_result}")
97
+ return "Submission completed successfully!", pd.DataFrame(results_log)
98
+ except Exception as e:
99
+ return f"Error submitting answers: {e}", pd.DataFrame(results_log)
100
+
101
+ # --- Build Gradio Interface ---
102
+ with gr.Blocks() as demo:
103
+ gr.Markdown("# Gaia Agent Evaluation Runner")
104
+ gr.Markdown("""
105
+ **Instructions:**
106
+ 1. Clone this space, then modify the code to define your agent's logic.
107
+ 2. Log in to your Hugging Face account using the button below.
108
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see results.
109
+
110
+ **Note:** Using the HF API can take a few seconds per question.
111
+ """)
112
+ login_btn = gr.LoginButton()
113
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
114
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
115
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
116
+
117
+ run_button.click(
118
+ fn=run_and_submit_all,
119
+ inputs=[login_btn],
120
+ outputs=[status_output, results_table]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  )
122
 
 
 
 
 
 
 
 
 
 
 
123
  if __name__ == "__main__":
124
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
125
+ print("Launching Gradio Interface for Gaia Agent Evaluation...")
126
+ demo.launch(debug=True, share=False)