Karim0111 commited on
Commit
eab6fbb
Β·
verified Β·
1 Parent(s): a17734f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -141
app.py CHANGED
@@ -4,196 +4,171 @@ import requests
4
  import pandas as pd
5
  import re
6
  from huggingface_hub import InferenceClient
 
7
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
-
12
- # =========================
13
- # GAIA OPTIMIZED AGENT
14
- # =========================
15
- class GAIAAgent:
16
  """
17
- GAIA benchmark agent – chat-only, nscale-safe, exact answers.
 
18
  """
19
 
20
  def __init__(self):
21
- print("πŸš€ GAIAAgent initializing...")
22
-
23
- hf_token = (
24
- os.getenv("HF_TOKEN")
25
- or os.getenv("HUGGING_FACE_HUB_TOKEN")
26
- or os.getenv("HF_API_TOKEN")
27
- )
28
-
29
  if not hf_token:
30
- raise RuntimeError("HF_TOKEN not found in Space secrets")
 
 
 
31
 
32
  self.client = InferenceClient(token=hf_token)
33
-
34
- # βœ… SAFE MODELS (chat-only)
35
- self.model = "meta-llama/Meta-Llama-3-8B-Instruct"
36
- # Alternative:
37
- # self.model = "Qwen/Qwen2.5-7B-Instruct"
38
-
39
  print(f"βœ… Model loaded: {self.model}")
40
 
41
  def __call__(self, question: str) -> str:
42
- print(f"\nQ: {question[:120]}")
 
 
 
 
 
 
 
 
 
43
 
44
  try:
45
- answer = self._chat_answer(question)
46
- print(f"A: {answer}")
47
  return answer
48
  except Exception as e:
49
  print(f"❌ Agent error: {e}")
50
- return "Unable to determine answer"
51
-
52
- def _chat_answer(self, question: str) -> str:
53
- messages = [
54
- {
55
- "role": "system",
56
- "content": (
57
- "You are an expert GAIA benchmark solver.\n"
58
- "Answer EXACTLY what is asked.\n"
59
- "Return ONLY the final answer.\n"
60
- "No explanations, no prefixes, no formatting."
61
- )
62
- },
63
- {
64
- "role": "user",
65
- "content": question
66
- }
67
- ]
68
-
69
- response = self.client.chat_completion(
70
  model=self.model,
71
- messages=messages,
72
- max_tokens=256,
73
- temperature=0.0,
 
 
74
  )
75
-
76
- if not response or not response.choices:
77
- return "Unable to determine answer"
78
-
79
- raw = response.choices[0].message.content.strip()
80
- return self._clean_answer(raw)
81
 
82
  def _clean_answer(self, text: str) -> str:
83
  """
84
- GAIA-safe cleaning: minimal, no hallucinated trimming.
85
  """
86
- text = text.strip()
87
-
88
- # Remove common junk if model disobeys
89
- bad_prefixes = [
90
- "answer:",
91
- "final answer:",
92
- "the answer is",
93
- "result:"
94
- ]
95
-
96
- for p in bad_prefixes:
97
- if text.lower().startswith(p):
98
- text = text[len(p):].strip()
99
-
100
- # If multi-line, keep first meaningful line
101
- if "\n" in text:
102
- text = text.split("\n")[0].strip()
103
-
104
- # GAIA prefers concise
105
- if len(text.split()) > 12:
106
- # keep last sentence
107
- parts = re.split(r"[.!?]", text)
108
- text = parts[-2].strip() if len(parts) > 1 else parts[0].strip()
109
 
110
- return text
 
 
 
 
 
111
 
112
 
113
- # =========================
114
- # RUN + SUBMIT
115
- # =========================
116
  def run_and_submit_all(profile: gr.OAuthProfile | None):
117
-
118
- if not profile:
119
- return "Please login with Hugging Face.", None
120
-
121
- username = profile.username
122
- print(f"πŸ‘€ User: {username}")
 
 
123
 
124
  questions_url = f"{DEFAULT_API_URL}/questions"
125
  submit_url = f"{DEFAULT_API_URL}/submit"
126
 
127
- agent = GAIAAgent()
 
 
 
128
 
129
- # Fetch questions
130
- questions = requests.get(questions_url, timeout=15).json()
131
-
132
- answers_payload = []
133
- results_log = []
134
 
135
- for idx, item in enumerate(questions):
136
- task_id = item["task_id"]
137
- question = item["question"]
 
 
 
 
138
 
139
- print(f"\n[{idx+1}/{len(questions)}] {task_id}")
140
- answer = agent(question)
141
 
142
- answers_payload.append({
143
- "task_id": task_id,
144
- "submitted_answer": answer
145
- })
 
146
 
 
 
147
  results_log.append({
148
  "Task ID": task_id,
149
- "Answer": answer
 
150
  })
151
 
152
- submission = {
153
- "username": username,
154
- "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
155
- "answers": answers_payload
156
- }
157
-
158
- response = requests.post(submit_url, json=submission, timeout=60)
159
- result = response.json()
160
-
161
- status = (
162
- f"πŸŽ‰ Submission Successful\n\n"
163
- f"Score: {result.get('score')}%\n"
164
- f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}"
165
- )
166
-
167
- return status, pd.DataFrame(results_log)
168
-
 
169
 
170
- # =========================
171
- # GRADIO UI
172
- # =========================
173
- with gr.Blocks(title="GAIA Agent") as demo:
174
- gr.Markdown("# πŸ€— GAIA Benchmark Agent (Fixed)")
175
 
 
 
 
176
  gr.Markdown(
177
  """
178
- βœ… Chat-only
179
- βœ… nscale-safe
180
- βœ… GAIA-optimized
181
-
182
- **Steps**
183
- 1. Add `HF_TOKEN` to Space secrets
184
- 2. Login with Hugging Face
185
- 3. Click Run
186
- """
187
  )
188
 
189
  gr.LoginButton()
190
- run_btn = gr.Button("πŸš€ Run Evaluation", variant="primary")
191
-
192
- status = gr.Textbox(label="Status", lines=6)
193
- table = gr.DataFrame(label="Results")
194
-
195
- run_btn.click(run_and_submit_all, outputs=[status, table])
196
 
 
197
 
198
  if __name__ == "__main__":
199
- demo.launch(debug=True)
 
4
  import pandas as pd
5
  import re
6
  from huggingface_hub import InferenceClient
7
+ import time
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
+ # --- Free GAIA Agent Definition ---
13
+ class EnhancedGAIAAgentFree:
 
 
 
14
  """
15
+ GAIA Agent for free HuggingFace models.
16
+ Avoids PRO credits and multi-modal content.
17
  """
18
 
19
  def __init__(self):
20
+ print("πŸš€ GAIAAgent initializing... (FREE version)")
21
+
22
+ hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
 
 
 
 
 
23
  if not hf_token:
24
+ print("⚠️ HF_TOKEN not found! Add it to Space secrets.")
25
+ self.client = None
26
+ self.model = None
27
+ return
28
 
29
  self.client = InferenceClient(token=hf_token)
30
+ self.model = "TheBloke/guanaco-7B-GPTQ" # free HF model
 
 
 
 
 
31
  print(f"βœ… Model loaded: {self.model}")
32
 
33
  def __call__(self, question: str) -> str:
34
+ """
35
+ Answer a question with free LLM.
36
+ """
37
+ print(f"\nQ: {question[:150]}...")
38
+ if not self.client or not self.model:
39
+ return "ERROR: HF_TOKEN not configured"
40
+
41
+ # Skip unsupported questions
42
+ if any(word in question.lower() for word in ["image", "video", "file", "attached", "excel", "code"]):
43
+ return "unknown"
44
 
45
  try:
46
+ answer = self._generate_answer(question)
47
+ print(f"A: {answer[:150]}...")
48
  return answer
49
  except Exception as e:
50
  print(f"❌ Agent error: {e}")
51
+ return "unknown"
52
+
53
+ def _generate_answer(self, question: str) -> str:
54
+ """
55
+ Free LLM answer generator with safe prompt.
56
+ """
57
+ prompt = f"""
58
+ You are an expert for GAIA benchmark.
59
+ Answer concisely. ONLY provide the final answer.
60
+ If you cannot determine the answer, write "unknown".
61
+ Question: {question}
62
+ FINAL ANSWER:"""
63
+
64
+ response = self.client.text_generation(
 
 
 
 
 
 
65
  model=self.model,
66
+ prompt=prompt,
67
+ max_new_tokens=128,
68
+ temperature=0.1,
69
+ do_sample=False,
70
+ return_full_text=False
71
  )
72
+ return self._clean_answer(response)
 
 
 
 
 
73
 
74
  def _clean_answer(self, text: str) -> str:
75
  """
76
+ Keep only the final answer text.
77
  """
78
+ if not text:
79
+ return "unknown"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ # Remove prefixes
82
+ prefixes = ["Answer:", "The answer is", "A:", "FINAL ANSWER:", "Result:"]
83
+ for p in prefixes:
84
+ if text.lower().startswith(p.lower()):
85
+ text = text[len(p):].strip()
86
+ return text.strip() if text.strip() else "unknown"
87
 
88
 
 
 
 
89
  def run_and_submit_all(profile: gr.OAuthProfile | None):
90
+ """
91
+ Run agent on all questions and submit results.
92
+ """
93
+ space_id = os.getenv("SPACE_ID")
94
+ if profile:
95
+ username = profile.username
96
+ else:
97
+ return "Please login to HuggingFace.", None
98
 
99
  questions_url = f"{DEFAULT_API_URL}/questions"
100
  submit_url = f"{DEFAULT_API_URL}/submit"
101
 
102
+ # Instantiate Agent
103
+ agent = EnhancedGAIAAgentFree()
104
+ if not agent.client or not agent.model:
105
+ return "⚠️ HF_TOKEN not found! Add it to Space secrets.", None
106
 
107
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
 
 
 
108
 
109
+ # Fetch Questions
110
+ try:
111
+ response = requests.get(questions_url, timeout=15)
112
+ response.raise_for_status()
113
+ questions_data = response.json()
114
+ except Exception as e:
115
+ return f"Error fetching questions: {e}", None
116
 
117
+ results_log = []
118
+ answers_payload = []
119
 
120
+ for idx, item in enumerate(questions_data):
121
+ task_id = item.get("task_id")
122
+ question_text = item.get("question")
123
+ if not task_id or question_text is None:
124
+ continue
125
 
126
+ submitted_answer = agent(question_text)
127
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
128
  results_log.append({
129
  "Task ID": task_id,
130
+ "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
131
+ "Answer": submitted_answer[:80] + "..." if len(submitted_answer) > 80 else submitted_answer
132
  })
133
 
134
+ if not answers_payload:
135
+ return "No answers generated.", pd.DataFrame(results_log)
136
+
137
+ # Submit Results
138
+ submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
139
+ try:
140
+ response = requests.post(submit_url, json=submission_data, timeout=60)
141
+ response.raise_for_status()
142
+ result_data = response.json()
143
+ final_status = (
144
+ f"πŸŽ‰ Submission Successful!\n"
145
+ f"User: {result_data.get('username')}\n"
146
+ f"Score: {result_data.get('score', 'N/A')}% "
147
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
148
+ )
149
+ return final_status, pd.DataFrame(results_log)
150
+ except Exception as e:
151
+ return f"Submission failed: {e}", pd.DataFrame(results_log)
152
 
 
 
 
 
 
153
 
154
+ # --- Gradio Interface ---
155
+ with gr.Blocks(title="GAIA Agent Evaluation (Free)") as demo:
156
+ gr.Markdown("# πŸ€— GAIA Benchmark Agent (Free)")
157
  gr.Markdown(
158
  """
159
+ **Setup Required:**
160
+ 1. Add HF_TOKEN to Space secrets (Settings β†’ Variables and secrets)
161
+ 2. Get free token at: https://huggingface.co/settings/tokens (Read access)
162
+ 3. Login with HuggingFace, then click Run Evaluation.
163
+ """
 
 
 
 
164
  )
165
 
166
  gr.LoginButton()
167
+ run_button = gr.Button("πŸš€ Run Evaluation", variant="primary", size="lg")
168
+ status_output = gr.Textbox(label="Status", lines=8, interactive=False)
169
+ results_table = gr.DataFrame(label="Results", wrap=True)
 
 
 
170
 
171
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
172
 
173
  if __name__ == "__main__":
174
+ demo.launch(debug=True, share=False)