victor-johnson commited on
Commit
08d12df
·
verified ·
1 Parent(s): 5695f43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -152
app.py CHANGED
@@ -1,213 +1,243 @@
1
  import os
2
- import re
3
- import json
4
- import textwrap
5
  import requests
 
6
  import pandas as pd
 
 
7
  import torch
8
- import gradio as gr
 
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
 
11
- # ---------------- Constants ----------------
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
-
15
- # ---------------- Agent Definition ----------------
16
  class BasicAgent:
17
- """
18
- Local small LLM answering strictly in the GAIA Unit 4 format:
19
- Uses the official system prompt and outputs `FINAL ANSWER: <answer>`.
20
- """
21
-
22
  def __init__(self):
 
23
  model_id = "microsoft/Phi-3-mini-4k-instruct"
24
  print(f"🚀 Loading model locally: {model_id}")
25
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
26
  self.model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
29
  device_map="auto",
30
  )
31
- print("✅ Local model ready.")
32
 
33
- def _clean_final_answer(self, text: str) -> tuple[str, str]:
34
- """
35
- Extracts and cleans the 'FINAL ANSWER' part, and also returns the reasoning trace.
36
- """
37
- reasoning_trace = text.strip()
38
  match = re.search(r"FINAL ANSWER\s*[:\-]?\s*(.*)", text, re.IGNORECASE | re.DOTALL)
39
  if match:
40
- final_answer = match.group(1)
41
- else:
42
- final_answer = text
43
- # Basic normalization for exact match scoring
44
- final_answer = final_answer.strip("`'\" \t\n\r. ")
45
- return final_answer, reasoning_trace
46
-
47
- def __call__(self, question: str) -> dict:
48
- print(f"🧠 Processing question: {question[:80]}...")
49
- system_prompt = textwrap.dedent(f"""
50
- You are a general AI assistant. I will ask you a question.
51
- Report your thoughts, and finish your answer with the following template:
52
- FINAL ANSWER: [YOUR FINAL ANSWER].
53
 
54
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
55
- If you are asked for a number, don't use commas or units.
56
- If you are asked for a string, don't use articles or abbreviations.
57
- If you are asked for a list, apply the above format for each element.
 
 
 
 
58
 
59
  Question: {question}
60
  """).strip()
61
 
 
62
  inputs = self.tokenizer(system_prompt, return_tensors="pt").to(self.model.device)
63
  with torch.no_grad():
64
  outputs = self.model.generate(
65
  **inputs,
66
- max_new_tokens=100,
67
- temperature=0.0, # deterministic
68
- do_sample=False, # no randomness
69
  pad_token_id=self.tokenizer.eos_token_id,
70
  )
71
 
72
- generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
73
- raw_output = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
74
- model_answer, reasoning = self._clean_final_answer(raw_output)
75
- print(f"💡 Raw: '{raw_output[:80]}' → Final answer: '{model_answer}'")
76
- return {"model_answer": model_answer, "reasoning_trace": reasoning}
 
 
77
 
78
 
79
- # ---------------- API Utilities ----------------
80
- def fetch_questions() -> list[dict]:
81
- """Fetch all evaluation questions from the API."""
82
- try:
83
- resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=60)
84
- resp.raise_for_status()
85
- data = resp.json()
86
- if isinstance(data, list):
87
- print(f"✅ Retrieved {len(data)} questions from API.")
88
- return data
89
- except Exception as e:
90
- print(f"❌ Error fetching questions: {e}")
91
- return []
92
 
 
 
 
 
 
 
93
 
94
- def submit_answers(username: str, code_link: str, answers: list[dict], token: str) -> dict:
95
- """Submit answers in GAIA JSON-lines format."""
96
- payload = {
97
- "username": username,
98
- "agent_code": code_link,
99
- "answers": [{"task_id": a["task_id"], "submitted_answer": a["model_answer"]} for a in answers],
100
- }
101
- headers = {"Authorization": f"Bearer {token}"} if token else {}
102
 
 
103
  try:
104
- resp = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, headers=headers, timeout=90)
105
- resp.raise_for_status()
106
- print("✅ Submission successful.")
107
- return resp.json()
108
  except Exception as e:
109
- print(f" Submission failed: {e}")
110
- return {"success": False, "message": str(e)}
111
-
112
-
113
- # ---------------- Main Pipeline ----------------
114
- def run_and_submit_all(profile: gr.OAuthProfile | None = None, *_):
115
- # 1️⃣ Identify username
116
- username = profile.name if profile and hasattr(profile, "name") else os.getenv("HF_USERNAME", "anonymous")
117
- print(f"👤 Submitting as: {username}")
118
-
119
- # 2️⃣ Load token from env secret
120
- token = os.getenv("HF_TOKEN")
121
- if not token:
122
- return (
123
- " Missing Hugging Face token. Add a secret named `HF_TOKEN` in your Space → Settings → Repository secrets.",
124
- pd.DataFrame(),
125
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- # 3️⃣ Prepare agent_code link (as required by API)
128
- space_id = os.getenv("SPACE_ID", "")
129
- code_link = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "unknown"
130
 
131
- # 4️⃣ Fetch questions
132
- questions = fetch_questions()
133
- if not questions:
134
- return "❌ Could not fetch questions from API.", pd.DataFrame()
135
 
136
- # 5️⃣ Run model locally
 
137
  try:
138
- agent = BasicAgent()
139
- except Exception as e:
140
- return f"❌ Model initialization error: {e}", pd.DataFrame()
141
-
142
- results = []
143
- for q in questions:
144
- task_id = q.get("task_id")
145
- text = q.get("question", "")
 
 
 
 
 
 
 
146
  try:
147
- res = agent(text)
148
- except Exception as e:
149
- res = {"model_answer": f"[Error: {e}]", "reasoning_trace": ""}
150
- res["task_id"] = task_id
151
- results.append(res)
152
-
153
- # 6️⃣ Build display DataFrame
154
- df = pd.DataFrame(results)
155
-
156
- # 7️⃣ Submit
157
- submission_result = submit_answers(username, code_link, results, token)
158
- msg = submission_result.get("message", "No message returned.")
159
- score = submission_result.get("score")
160
- if score is not None:
161
- msg += f" | Score: {score:.2%}"
162
-
163
- if submission_result.get("success", True):
164
- return f"✅ Submission successful: {msg}", df
165
- else:
166
- return f" Submission failed: {msg}", df
 
 
 
167
 
168
 
169
- # ---------------- Gradio UI ----------------
170
  with gr.Blocks() as demo:
171
- gr.Markdown("# 🧠 GAIA Unit 4 Agent Evaluation")
172
  gr.Markdown(
173
  """
174
- ### Instructions
175
- 1. Log in with your Hugging Face account.
176
- 2. Add your **HF_TOKEN** as a secret in Space Settings Repository Secrets.
177
- 3. Click **Run Evaluation & Submit All Answers**.
178
-
179
  ---
180
- This app:
181
- - Fetches all tasks via `GET /questions`
182
- - Runs your local agent using GAIA’s official prompt
183
- - Submits answers to `POST /submit` in the required JSON format
184
  """
185
  )
186
- login_btn = gr.LoginButton()
187
- run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers")
188
- status_box = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
189
- result_table = gr.DataFrame(label="Questions and Model Answers", wrap=True)
190
 
191
- run_btn.click(fn=run_and_submit_all, inputs=[login_btn], outputs=[status_box, result_table])
192
 
 
193
 
194
- # ---------------- Launch ----------------
195
- if __name__ == "__main__":
196
- print("\n" + "-" * 30 + " App Starting " + "-" * 30)
197
- space_host = os.getenv("SPACE_HOST")
198
- space_id = os.getenv("SPACE_ID")
 
 
 
199
 
200
- if space_host:
201
- print(f" SPACE_HOST: {space_host}")
202
- print(f" Runtime: https://{space_host}.hf.space")
 
 
 
 
 
 
203
  else:
204
- print("ℹ️ SPACE_HOST not found (running locally?)")
205
 
206
- if space_id:
207
- print(f"✅ SPACE_ID: {space_id}")
208
- print(f" Repo: https://huggingface.co/spaces/{space_id}/tree/main")
 
209
  else:
210
- print("ℹ️ SPACE_ID not found (running locally?)")
 
 
211
 
212
- print("------------------------------------------------------------\n")
213
  demo.launch(debug=True, share=False)
 
1
  import os
2
+ import gradio as gr
 
 
3
  import requests
4
+ import inspect
5
  import pandas as pd
6
+
7
+ # 👇 NEW imports (added only what’s needed)
8
  import torch
9
+ import re
10
+ import textwrap
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
 
13
+ # (Keep Constants as is)
14
+ # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
+ # --- Basic Agent Definition ---
18
+ # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
19
  class BasicAgent:
 
 
 
 
 
20
  def __init__(self):
21
+ # ✅ Load a small, CPU-friendly model
22
  model_id = "microsoft/Phi-3-mini-4k-instruct"
23
  print(f"🚀 Loading model locally: {model_id}")
24
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
25
  self.model = AutoModelForCausalLM.from_pretrained(
26
  model_id,
27
+ torch_dtype=torch.float32, # safe for CPU
28
  device_map="auto",
29
  )
30
+ print("✅ Model ready.")
31
 
32
+ def _extract_final_answer(self, text: str) -> str:
33
+ """Extracts text after 'FINAL ANSWER:' if present."""
 
 
 
34
  match = re.search(r"FINAL ANSWER\s*[:\-]?\s*(.*)", text, re.IGNORECASE | re.DOTALL)
35
  if match:
36
+ return match.group(1).strip("`'\" \t\n\r.")
37
+ return text.strip()
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ def __call__(self, question: str) -> str:
40
+ print(f"🧠 Agent received question (first 50 chars): {question[:50]}...")
41
+
42
+ # Instruction to produce a clean final answer
43
+ system_prompt = textwrap.dedent(f"""
44
+ You are a helpful AI assistant.
45
+ Think step by step and end with:
46
+ FINAL ANSWER: <your concise answer>
47
 
48
  Question: {question}
49
  """).strip()
50
 
51
+ # Tokenize and generate
52
  inputs = self.tokenizer(system_prompt, return_tensors="pt").to(self.model.device)
53
  with torch.no_grad():
54
  outputs = self.model.generate(
55
  **inputs,
56
+ max_new_tokens=128,
57
+ temperature=0.0,
58
+ do_sample=False,
59
  pad_token_id=self.tokenizer.eos_token_id,
60
  )
61
 
62
+ # Decode and extract
63
+ generated = self.tokenizer.decode(
64
+ outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True
65
+ )
66
+ final_answer = self._extract_final_answer(generated)
67
+ print(f"💡 Model raw: '{generated[:80]}' → Final answer: '{final_answer}'")
68
+ return final_answer
69
 
70
 
71
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
72
+ """
73
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
74
+ and displays the results.
75
+ """
76
+ # --- Determine HF Space Runtime URL and Repo URL ---
77
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
 
 
 
 
 
78
 
79
+ if profile:
80
+ username= f"{profile.username}"
81
+ print(f"User logged in: {username}")
82
+ else:
83
+ print("User not logged in.")
84
+ return "Please Login to Hugging Face with the button.", None
85
 
86
+ api_url = DEFAULT_API_URL
87
+ questions_url = f"{api_url}/questions"
88
+ submit_url = f"{api_url}/submit"
 
 
 
 
 
89
 
90
+ # 1. Instantiate Agent ( modify this part to create your agent)
91
  try:
92
+ agent = BasicAgent()
 
 
 
93
  except Exception as e:
94
+ print(f"Error instantiating agent: {e}")
95
+ return f"Error initializing agent: {e}", None
96
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
97
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
98
+ print(agent_code)
99
+
100
+ # 2. Fetch Questions
101
+ print(f"Fetching questions from: {questions_url}")
102
+ try:
103
+ response = requests.get(questions_url, timeout=15)
104
+ response.raise_for_status()
105
+ questions_data = response.json()
106
+ if not questions_data:
107
+ print("Fetched questions list is empty.")
108
+ return "Fetched questions list is empty or invalid format.", None
109
+ print(f"Fetched {len(questions_data)} questions.")
110
+ except requests.exceptions.RequestException as e:
111
+ print(f"Error fetching questions: {e}")
112
+ return f"Error fetching questions: {e}", None
113
+ except requests.exceptions.JSONDecodeError as e:
114
+ print(f"Error decoding JSON response from questions endpoint: {e}")
115
+ print(f"Response text: {response.text[:500]}")
116
+ return f"Error decoding server response for questions: {e}", None
117
+ except Exception as e:
118
+ print(f"An unexpected error occurred fetching questions: {e}")
119
+ return f"An unexpected error occurred fetching questions: {e}", None
120
+
121
+ # 3. Run your Agent
122
+ results_log = []
123
+ answers_payload = []
124
+ print(f"Running agent on {len(questions_data)} questions...")
125
+ for item in questions_data:
126
+ task_id = item.get("task_id")
127
+ question_text = item.get("question")
128
+ if not task_id or question_text is None:
129
+ print(f"Skipping item with missing task_id or question: {item}")
130
+ continue
131
+ try:
132
+ submitted_answer = agent(question_text)
133
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
134
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
135
+ except Exception as e:
136
+ print(f"Error running agent on task {task_id}: {e}")
137
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
138
 
139
+ if not answers_payload:
140
+ print("Agent did not produce any answers to submit.")
141
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
142
 
143
+ # 4. Prepare Submission
144
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
145
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
146
+ print(status_update)
147
 
148
+ # 5. Submit
149
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
150
  try:
151
+ response = requests.post(submit_url, json=submission_data, timeout=60)
152
+ response.raise_for_status()
153
+ result_data = response.json()
154
+ final_status = (
155
+ f"Submission Successful!\n"
156
+ f"User: {result_data.get('username')}\n"
157
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
158
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
159
+ f"Message: {result_data.get('message', 'No message received.')}"
160
+ )
161
+ print("Submission successful.")
162
+ results_df = pd.DataFrame(results_log)
163
+ return final_status, results_df
164
+ except requests.exceptions.HTTPError as e:
165
+ error_detail = f"Server responded with status {e.response.status_code}."
166
  try:
167
+ error_json = e.response.json()
168
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
169
+ except requests.exceptions.JSONDecodeError:
170
+ error_detail += f" Response: {e.response.text[:500]}"
171
+ status_message = f"Submission Failed: {error_detail}"
172
+ print(status_message)
173
+ results_df = pd.DataFrame(results_log)
174
+ return status_message, results_df
175
+ except requests.exceptions.Timeout:
176
+ status_message = "Submission Failed: The request timed out."
177
+ print(status_message)
178
+ results_df = pd.DataFrame(results_log)
179
+ return status_message, results_df
180
+ except requests.exceptions.RequestException as e:
181
+ status_message = f"Submission Failed: Network error - {e}"
182
+ print(status_message)
183
+ results_df = pd.DataFrame(results_log)
184
+ return status_message, results_df
185
+ except Exception as e:
186
+ status_message = f"An unexpected error occurred during submission: {e}"
187
+ print(status_message)
188
+ results_df = pd.DataFrame(results_log)
189
+ return status_message, results_df
190
 
191
 
192
+ # --- Build Gradio Interface using Blocks ---
193
  with gr.Blocks() as demo:
194
+ gr.Markdown("# Basic Agent Evaluation Runner")
195
  gr.Markdown(
196
  """
197
+ **Instructions:**
198
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
199
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
200
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
201
  ---
202
+ **Disclaimers:**
203
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
204
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
 
205
  """
206
  )
 
 
 
 
207
 
208
+ gr.LoginButton()
209
 
210
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
211
 
212
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
213
+ # Removed max_rows=10 from DataFrame constructor
214
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
215
+
216
+ run_button.click(
217
+ fn=run_and_submit_all,
218
+ outputs=[status_output, results_table]
219
+ )
220
 
221
+ if __name__ == "__main__":
222
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
223
+ # Check for SPACE_HOST and SPACE_ID at startup for information
224
+ space_host_startup = os.getenv("SPACE_HOST")
225
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
226
+
227
+ if space_host_startup:
228
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
229
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
230
  else:
231
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
232
 
233
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
234
+ print(f"✅ SPACE_ID found: {space_id_startup}")
235
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
236
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
237
  else:
238
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
239
+
240
+ print("-"*(60 + len(" App Starting ")) + "\n")
241
 
242
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
243
  demo.launch(debug=True, share=False)