victor-johnson commited on
Commit
9826e06
Β·
verified Β·
1 Parent(s): dc2c4fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -88
app.py CHANGED
@@ -10,18 +10,15 @@ import torch
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
- # --- Enhanced BasicAgent (local model version) ---
14
  class BasicAgent:
15
  """
16
  Loads and runs a small LLM *locally* inside the Hugging Face Space
17
- instead of calling the Hugging Face Inference API (which is blocked).
18
  """
19
  def __init__(self):
20
- # βœ… Small model to fit free Spaces β€” change to another instruct model if needed
21
  model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
22
  print(f"πŸš€ Loading model locally: {model_id}")
23
-
24
- # Load tokenizer and model
25
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
26
  self.model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
@@ -31,73 +28,79 @@ class BasicAgent:
31
  print("βœ… Local model ready.")
32
 
33
  def _clean(self, raw: str) -> str:
34
- """Post-process the model output to a short, exact value."""
35
  txt = raw.strip()
36
- lines = [l.strip() for l in txt.splitlines() if l.strip()]
37
- if lines:
38
- txt = lines[-1]
39
- txt = re.sub(r"^(final answer|answer|prediction)\s*[:\-]\s*", "", txt, flags=re.I)
40
  txt = txt.strip("`'\" \t\n\r")
41
  txt = re.sub(r"[ \t]*[.;,:-]+$", "", txt)
42
  return txt[:200]
43
 
44
  def __call__(self, question: str) -> str:
45
- print(f"🧠 Agent received question: {question[:120]}...")
46
-
47
- # Simple concise prompt
48
  prompt = textwrap.dedent(f"""
49
- You must answer the question with a single, concise value
50
- (number, word, date, or short phrase) and nothing else.
51
-
 
 
 
 
 
52
  Question: {question}
53
- Final answer:
54
  """).strip()
55
-
56
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
57
-
58
- # Generate
59
  with torch.no_grad():
60
  outputs = self.model.generate(
61
  **inputs,
62
- max_new_tokens=50,
63
  temperature=0.7,
64
  do_sample=True,
65
  pad_token_id=self.tokenizer.eos_token_id,
66
  )
67
-
68
  generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
69
  raw_answer = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
70
  clean_ans = self._clean(raw_answer)
71
- print(f"πŸ’‘ Agent raw: '{raw_answer[:80]}' β†’ clean: '{clean_ans}'")
72
  return clean_ans
73
 
74
 
75
- # --- Test Questions & Expected Answers ---
76
- QUESTIONS_AND_ANSWERS = [
77
- {"question": "What is the capital of France?", "expected": "Paris"},
78
- {"question": "What is 5 + 7?", "expected": "12"},
79
- {"question": "In what year did World War II end?", "expected": "1945"},
80
- {"question": "What is the largest planet in our solar system?", "expected": "Jupiter"},
81
- {"question": "Who wrote 'Romeo and Juliet'?", "expected": "Shakespeare"},
82
- ]
 
 
 
 
 
 
 
 
83
 
84
 
85
- # --- Submission Function ---
86
- def submit_answers(answers: list, token: str) -> dict:
87
  """
88
- Submit answers to the scoring API.
 
89
  """
 
 
 
 
 
 
 
90
  try:
91
- space_host = os.getenv("SPACE_HOST", "")
92
- space_id = os.getenv("SPACE_ID", "")
93
-
94
- payload = {
95
- "answers": answers,
96
- "space_host": space_host,
97
- "space_id": space_id,
98
- }
99
-
100
- headers = {"Authorization": f"Bearer {token}"}
101
  resp = requests.post(
102
  f"{DEFAULT_API_URL}/submit",
103
  json=payload,
@@ -105,76 +108,101 @@ def submit_answers(answers: list, token: str) -> dict:
105
  timeout=60,
106
  )
107
  resp.raise_for_status()
 
108
  return resp.json()
109
  except Exception as e:
 
110
  return {"success": False, "message": str(e)}
111
 
112
 
113
- # --- Main Run Function (fixed for HF_TOKEN) ---
114
  def run_and_submit_all(profile: gr.OAuthProfile | None = None, *_):
115
  """
116
- Runs the local agent and submits answers.
117
- Uses HF_TOKEN from environment variables instead of Gradio OAuth token.
 
 
118
  """
119
- # --- Step 1: Detect user login ---
120
- if profile and hasattr(profile, "name"):
121
- print(f"πŸ‘€ Logged in as: {profile.name}")
 
122
  else:
123
- print("⚠️ No OAuth profile detected (this is fine if HF_TOKEN is set).")
 
124
 
125
- # --- Step 2: Get token from environment variable ---
126
  token = os.getenv("HF_TOKEN")
127
  if not token:
128
  return (
129
- "❌ No token found. Please set your Hugging Face token as an environment variable named `HF_TOKEN`.\n"
130
- "In your Space: Settings β†’ Repository secrets β†’ Add new secret β†’ Name: HF_TOKEN, Value: your_token_here",
131
  pd.DataFrame(),
132
  )
133
 
134
- # --- Step 3: Instantiate the agent ---
 
 
 
 
 
 
 
 
 
135
  try:
136
  agent = BasicAgent()
137
  except Exception as e:
138
  return f"❌ Error instantiating agent: {e}", pd.DataFrame()
139
 
140
- # --- Step 4: Collect answers ---
141
  results = []
142
- for qa in QUESTIONS_AND_ANSWERS:
143
- q = qa["question"]
144
- expected = qa["expected"]
145
  try:
146
- answer = agent(q)
147
  except Exception as e:
148
  answer = f"[Error: {e}]"
149
- results.append({"question": q, "answer": answer, "expected": expected})
 
 
 
 
150
 
151
- # --- Step 5: Build DataFrame ---
152
  df = pd.DataFrame(results)
153
 
154
- # --- Step 6: Submit ---
155
- answers_list = [r["answer"] for r in results]
156
- submission_result = submit_answers(answers_list, token)
 
 
 
 
 
 
 
 
157
 
158
- if submission_result.get("success"):
159
- msg = submission_result.get("message", "βœ… Submission successful")
160
- return f"βœ… {msg}", df
161
  else:
162
- msg = submission_result.get("message", "Unknown error")
163
- return f"❌ Submission failed: {msg}", df
164
 
165
 
166
  # --- Gradio Interface ---
167
  with gr.Blocks() as demo:
168
- gr.Markdown("# 🧠 Basic Agent Evaluation Runner")
169
  gr.Markdown(
170
  """
171
- **Instructions:**
172
- 1. Log in to your Hugging Face account.
173
- 2. Add your Hugging Face token as a secret named `HF_TOKEN` in your Space settings.
174
  3. Click **Run Evaluation & Submit All Answers**.
175
 
176
  ---
177
- The agent now runs *locally* inside the Space instead of using the API.
 
 
178
  """
179
  )
180
  login_button = gr.LoginButton()
@@ -189,26 +217,24 @@ with gr.Blocks() as demo:
189
  )
190
 
191
 
192
- # --- Launch ---
193
  if __name__ == "__main__":
194
  print("\n" + "-" * 30 + " App Starting " + "-" * 30)
195
-
196
  space_host = os.getenv("SPACE_HOST")
197
  space_id = os.getenv("SPACE_ID")
198
-
199
  if space_host:
200
- print(f"βœ… SPACE_HOST found: {space_host}")
201
- print(f" Runtime URL should be: https://{space_host}.hf.space")
202
  else:
203
- print("ℹ️ SPACE_HOST not found (running locally?).")
204
-
205
  if space_id:
206
- print(f"βœ… SPACE_ID found: {space_id}")
207
- print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
208
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id}/tree/main")
209
  else:
210
- print("ℹ️ SPACE_ID not found (running locally?).")
211
-
212
- print("-" * (60 + len(" App Starting ")) + "\n")
213
  print("Launching Gradio Interface for Basic Agent Evaluation...")
214
  demo.launch(debug=True, share=False)
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
+ # --- Local LLM Agent ---
14
  class BasicAgent:
15
  """
16
  Loads and runs a small LLM *locally* inside the Hugging Face Space
17
+ instead of calling an external inference API.
18
  """
19
  def __init__(self):
 
20
  model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
21
  print(f"πŸš€ Loading model locally: {model_id}")
 
 
22
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  self.model = AutoModelForCausalLM.from_pretrained(
24
  model_id,
 
28
  print("βœ… Local model ready.")
29
 
30
  def _clean(self, raw: str) -> str:
31
+ """Cleans raw model output for exact-match submission."""
32
  txt = raw.strip()
33
+ # remove β€œFINAL ANSWER:” or similar
34
+ txt = re.sub(r"(?i)final\s*answer\s*[:\-]*", "", txt)
35
+ txt = re.sub(r"^(answer|prediction)\s*[:\-]*", "", txt, flags=re.I)
 
36
  txt = txt.strip("`'\" \t\n\r")
37
  txt = re.sub(r"[ \t]*[.;,:-]+$", "", txt)
38
  return txt[:200]
39
 
40
  def __call__(self, question: str) -> str:
41
+ print(f"🧠 Question: {question[:100]}...")
42
+ # system-style prompt based on GAIA paper instructions
 
43
  prompt = textwrap.dedent(f"""
44
+ You are a general AI assistant. I will ask you a question.
45
+ Report your thoughts, and finish your answer with the following template:
46
+ FINAL ANSWER: [YOUR FINAL ANSWER].
47
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
48
+ If you are asked for a number, don't use commas or units.
49
+ If you are asked for a string, don't use articles or abbreviations.
50
+ If you are asked for a list, follow the above format for each item.
51
+
52
  Question: {question}
 
53
  """).strip()
54
+
55
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
 
 
56
  with torch.no_grad():
57
  outputs = self.model.generate(
58
  **inputs,
59
+ max_new_tokens=80,
60
  temperature=0.7,
61
  do_sample=True,
62
  pad_token_id=self.tokenizer.eos_token_id,
63
  )
64
+
65
  generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
66
  raw_answer = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
67
  clean_ans = self._clean(raw_answer)
68
+ print(f"πŸ’‘ Raw: '{raw_answer[:80]}' β†’ Clean: '{clean_ans}'")
69
  return clean_ans
70
 
71
 
72
+ # --- Fetch Questions ---
73
+ def fetch_questions() -> list[dict]:
74
+ """Fetch all evaluation questions via GET /questions."""
75
+ try:
76
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=60)
77
+ resp.raise_for_status()
78
+ data = resp.json()
79
+ if isinstance(data, list):
80
+ print(f"βœ… Retrieved {len(data)} questions from API.")
81
+ return data
82
+ else:
83
+ print("⚠️ Unexpected response format from /questions.")
84
+ return []
85
+ except Exception as e:
86
+ print(f"❌ Failed to fetch questions: {e}")
87
+ return []
88
 
89
 
90
+ # --- Submit Answers ---
91
+ def submit_answers(username: str, code_link: str, answers: list[dict], token: str) -> dict:
92
  """
93
+ Submit answers to POST /submit for scoring.
94
+ Each answer must include task_id and submitted_answer.
95
  """
96
+ payload = {
97
+ "username": username,
98
+ "agent_code": code_link,
99
+ "answers": answers,
100
+ }
101
+
102
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
103
  try:
 
 
 
 
 
 
 
 
 
 
104
  resp = requests.post(
105
  f"{DEFAULT_API_URL}/submit",
106
  json=payload,
 
108
  timeout=60,
109
  )
110
  resp.raise_for_status()
111
+ print("βœ… Submission successful.")
112
  return resp.json()
113
  except Exception as e:
114
+ print(f"❌ Submission error: {e}")
115
  return {"success": False, "message": str(e)}
116
 
117
 
118
+ # --- Main Run Function (using HF_TOKEN) ---
119
  def run_and_submit_all(profile: gr.OAuthProfile | None = None, *_):
120
  """
121
+ Full pipeline:
122
+ 1. Fetch questions
123
+ 2. Run local agent
124
+ 3. Submit answers via API
125
  """
126
+ # Step 1: Identify user
127
+ username = profile.name if profile and hasattr(profile, "name") else os.getenv("HF_USERNAME", "")
128
+ if username:
129
+ print(f"πŸ‘€ Running as: {username}")
130
  else:
131
+ print("⚠️ Username not detected β€” please log in via Hugging Face.")
132
+ username = "anonymous"
133
 
134
+ # Step 2: Load token from environment
135
  token = os.getenv("HF_TOKEN")
136
  if not token:
137
  return (
138
+ "❌ No token found. Please add a secret named `HF_TOKEN` in your Space settings.",
 
139
  pd.DataFrame(),
140
  )
141
 
142
+ # Step 3: Load code link (required for submission)
143
+ space_id = os.getenv("SPACE_ID", "")
144
+ code_link = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "N/A"
145
+
146
+ # Step 4: Fetch questions
147
+ questions = fetch_questions()
148
+ if not questions:
149
+ return "❌ Could not fetch questions from API.", pd.DataFrame()
150
+
151
+ # Step 5: Instantiate model
152
  try:
153
  agent = BasicAgent()
154
  except Exception as e:
155
  return f"❌ Error instantiating agent: {e}", pd.DataFrame()
156
 
157
+ # Step 6: Answer questions
158
  results = []
159
+ for q in questions:
160
+ task_id = q.get("task_id")
161
+ question_text = q.get("question", "")
162
  try:
163
+ answer = agent(question_text)
164
  except Exception as e:
165
  answer = f"[Error: {e}]"
166
+ results.append({
167
+ "task_id": task_id,
168
+ "question": question_text,
169
+ "submitted_answer": answer
170
+ })
171
 
 
172
  df = pd.DataFrame(results)
173
 
174
+ # Step 7: Submit to API
175
+ submission_payload = [
176
+ {"task_id": r["task_id"], "submitted_answer": r["submitted_answer"]}
177
+ for r in results
178
+ ]
179
+ result = submit_answers(username, code_link, submission_payload, token)
180
+
181
+ message = result.get("message", "No message returned.")
182
+ score = result.get("score")
183
+ if score is not None:
184
+ message += f" | Score: {score:.2%}"
185
 
186
+ if result.get("success", True):
187
+ return f"βœ… Submission successful: {message}", df
 
188
  else:
189
+ return f"❌ Submission failed: {message}", df
 
190
 
191
 
192
  # --- Gradio Interface ---
193
  with gr.Blocks() as demo:
194
+ gr.Markdown("# 🧠 Unit 4 Agent Evaluation Runner")
195
  gr.Markdown(
196
  """
197
+ ### Instructions
198
+ 1. Log in with your Hugging Face account.
199
+ 2. Add your HF token as a secret (`HF_TOKEN`) in your Space β†’ Settings β†’ Repository Secrets.
200
  3. Click **Run Evaluation & Submit All Answers**.
201
 
202
  ---
203
+ The agent runs *locally* and uses the official API routes:
204
+ - `GET /questions`
205
+ - `POST /submit`
206
  """
207
  )
208
  login_button = gr.LoginButton()
 
217
  )
218
 
219
 
220
+ # --- Launch App ---
221
  if __name__ == "__main__":
222
  print("\n" + "-" * 30 + " App Starting " + "-" * 30)
 
223
  space_host = os.getenv("SPACE_HOST")
224
  space_id = os.getenv("SPACE_ID")
225
+
226
  if space_host:
227
+ print(f"βœ… SPACE_HOST: {space_host}")
228
+ print(f" Runtime URL: https://{space_host}.hf.space")
229
  else:
230
+ print("ℹ️ No SPACE_HOST (running locally?)")
231
+
232
  if space_id:
233
+ print(f"βœ… SPACE_ID: {space_id}")
234
+ print(f" Repo: https://huggingface.co/spaces/{space_id}/tree/main")
 
235
  else:
236
+ print("ℹ️ No SPACE_ID (running locally?)")
237
+
238
+ print("-" * (60 + len(' App Starting ')) + "\n")
239
  print("Launching Gradio Interface for Basic Agent Evaluation...")
240
  demo.launch(debug=True, share=False)