surya07 commited on
Commit
2112cbb
·
verified ·
1 Parent(s): 398ce8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -252
app.py CHANGED
@@ -1,281 +1,211 @@
1
- # """ Basic Agent Evaluation Runner"""
2
- # import os
3
- # import certifi
4
- # os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
5
- # import inspect
6
- # import gradio as gr
7
- # import requests
8
- # import pandas as pd
9
- # from langchain_core.messages import HumanMessage
10
- # from agent import construct_agent_graph
11
-
12
-
13
- # # --- Constants ---
14
- # DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
-
16
- # import re
17
- # class LangGraphAgent:
18
- # """A LangGraph agent wrapper."""
19
- # def __init__(self):
20
- # print("LangGraphAgent initialized.")
21
- # self.pipeline = construct_agent_graph()
22
-
23
- # def __call__(self, query: str) -> str:
24
- # msgs = [HumanMessage(content=query)]
25
- # out = self.pipeline.invoke({"messages": msgs})
26
- # raw = out["messages"][-1].content.strip()
27
-
28
- # # drop any XML tags or prefixes
29
- # # e.g. "<think>…</think> FINAL ANSWER: 4"
30
- # # or "4" → stay "4"
31
- # # split on newlines, take last non-empty line, strip non-digits/words
32
- # lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
33
- # candidate = lines[-1]
34
-
35
- # # If it says "FINAL ANSWER: 4" or "Answer: 4", grab only the part after colon
36
- # if ":" in candidate:
37
- # candidate = candidate.split(":", 1)[1].strip()
38
-
39
- # # Finally, remove any leftover xml tags
40
- # candidate = re.sub(r"<.*?>", "", candidate)
41
-
42
- # return candidate
43
-
44
-
45
- # def run_and_submit_all(profile: gr.OAuthProfile | None):
46
- # """
47
- # Fetches all questions, runs the LangGraphAgent on them, submits responses,
48
- # and returns the submission status and a DataFrame of Q&A.
49
- # """
50
- # space_id = os.getenv("SPACE_ID")
51
-
52
- # if not profile:
53
- # return "Please log in to Hugging Face.", None
54
- # username = profile.username.strip()
55
- # print(f"User: {username}")
56
-
57
- # questions_url = f"{DEFAULT_API_URL}/questions"
58
- # submit_url = f"{DEFAULT_API_URL}/submit"
59
-
60
- # # Instantiate agent
61
- # try:
62
- # agent = LangGraphAgent()
63
- # except Exception as err:
64
- # return f"Initialization error: {err}", None
65
-
66
- # # Fetch questions
67
- # try:
68
- # resp = requests.get(questions_url, timeout=15)
69
- # resp.raise_for_status()
70
- # tasks = resp.json()
71
- # if not isinstance(tasks, list) or not tasks:
72
- # raise ValueError("No questions retrieved.")
73
- # except Exception as err:
74
- # return f"Error fetching questions: {err}", None
75
-
76
- # # Run agent and collect answers
77
- # results = []
78
- # answers = []
79
- # for item in tasks:
80
- # tid = item.get("task_id")
81
- # question = item.get("question")
82
- # if tid is None or question is None:
83
- # continue
84
- # try:
85
- # ans = agent(question)
86
- # except Exception as err:
87
- # ans = f"ERROR: {err}"
88
- # results.append({"Task ID": tid, "Question": question, "Answer": ans})
89
- # answers.append({"task_id": tid, "submitted_answer": ans})
90
-
91
- # if not answers:
92
- # return "No answers to submit.", pd.DataFrame(results)
93
-
94
- # payload = {
95
- # "username": username,
96
- # "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
97
- # "answers": answers
98
- # }
99
-
100
- # # Submit
101
- # try:
102
- # resp = requests.post(submit_url, json=payload, timeout=60)
103
- # resp.raise_for_status()
104
- # data = resp.json()
105
- # status = (
106
- # f"Submitted! Score: {data.get('score', 'N/A')}% "
107
- # f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})"
108
- # )
109
- # except Exception as err:
110
- # status = f"Submission failed: {err}"
111
-
112
- # return status, pd.DataFrame(results)
113
-
114
-
115
- # # --- Gradio UI ---
116
- # with gr.Blocks() as demo:
117
- # gr.Markdown("# LangGraph Agent Evaluation Runner")
118
- # gr.Markdown(
119
- # """
120
- # 1. Clone this space and customize your agent.
121
- # 2. Log in with Hugging Face.
122
- # 3. Click Run to evaluate and submit.
123
- # """
124
- # )
125
- # gr.LoginButton()
126
- # run_btn = gr.Button("Run & Submit Answers")
127
- # status_box = gr.Textbox(label="Status", lines=3, interactive=False)
128
- # table = gr.DataFrame(label="Results", wrap=True)
129
-
130
- # run_btn.click(
131
- # fn=run_and_submit_all,
132
- # outputs=[status_box, table]
133
- # )
134
-
135
- # if __name__ == "__main__":
136
- # space_host = os.getenv("SPACE_HOST")
137
- # space_id = os.getenv("SPACE_ID")
138
- # if space_host and space_id:
139
- # print(f"Running at https://{space_host}.hf.space")
140
- # demo.launch(debug=True)
141
-
142
-
143
- """ Basic Agent Evaluation Runner"""
144
  import os
145
- import certifi
146
- os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
147
  import gradio as gr
148
  import requests
 
149
  import pandas as pd
150
- import json
151
- import re
152
- from langchain_core.messages import HumanMessage
153
- from agent import construct_agent_graph
154
-
155
  # --- Constants ---
156
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
157
 
158
- class LangGraphAgent:
159
- """A LangGraph agent wrapper."""
 
160
  def __init__(self):
161
- print("LangGraphAgent initialized.")
162
- self.pipeline = construct_agent_graph()
163
-
164
- def __call__(self, query: str) -> str:
165
- msgs = [HumanMessage(content=query)]
166
- out = self.pipeline.invoke({"messages": msgs})
167
- raw = out["messages"][-1].content.strip()
168
-
169
- # Take only the last non-empty line
170
- lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
171
- answer = lines[-1] if lines else raw
172
-
173
- # Remove any prefix (e.g. "FINAL ANSWER:", "Answer:")
174
- if ":" in answer:
175
- answer = answer.split(":", 1)[1].strip()
176
-
177
- # Strip XML/HTML tags
178
- answer = re.sub(r"<.*?>", "", answer)
179
-
180
- # Strip outer quotes or punctuation
181
- answer = answer.strip(" '\".,")
182
- return answer
183
-
184
- def run_and_submit_all(profile: gr.OAuthProfile | None):
185
- space_id = os.getenv("SPACE_ID")
186
- if not profile:
187
- return "Please log in to Hugging Face.", None
188
- username = profile.username.strip()
189
-
190
- # 1) Load metadata lookup
191
- lookup = {}
192
  try:
193
- with open("metadata.jsonl") as f:
194
- for line in f:
195
- rec = json.loads(line)
196
- tid = rec.get("task_id") or rec.get("Task ID")
197
- ans = rec.get("answer") or rec.get("Final answer") or rec.get("Submitted Answer")
198
- if tid and ans is not None:
199
- lookup[str(tid)] = str(ans)
200
- except FileNotFoundError:
201
- print("No metadata.jsonl found—falling back to agent for all tasks.")
202
  except Exception as e:
203
- print(f"Error loading metadata.jsonl: {e}")
204
-
205
- # 2) Fetch questions
206
- questions_url = f"{DEFAULT_API_URL}/questions"
207
- submit_url = f"{DEFAULT_API_URL}/submit"
 
 
 
208
  try:
209
- resp = requests.get(questions_url, timeout=15)
210
- resp.raise_for_status()
211
- tasks = resp.json()
212
- except Exception as e:
 
 
 
 
 
213
  return f"Error fetching questions: {e}", None
214
-
215
- # 3) Instantiate agent once
216
- try:
217
- agent = LangGraphAgent()
218
  except Exception as e:
219
- return f"Initialization error: {e}", None
220
-
221
- # 4) Loop & answer (lookup first, then agent)
222
- results = []
223
- payload = []
224
- for item in tasks:
225
- tid = str(item.get("task_id"))
226
- q = item.get("question", "")
227
- if not tid or not q:
 
 
 
228
  continue
229
-
230
- if tid in lookup:
231
- ans = lookup[tid]
232
- else:
233
  try:
234
- ans = agent(q)
235
- except Exception as e:
236
- ans = f"ERROR: {e}"
237
-
238
- results.append({"Task ID": tid, "Question": q, "Answer": ans})
239
- payload.append({"task_id": tid, "submitted_answer": ans})
240
-
241
- if not payload:
242
- return "No answers generated.", pd.DataFrame(results)
243
-
244
- # 5) Submit
245
- submission = {
246
- "username": username,
247
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
248
- "answers": payload
249
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  try:
251
- resp = requests.post(submit_url, json=submission, timeout=60)
252
- resp.raise_for_status()
253
- data = resp.json()
254
- status = (
255
- f"Submitted! Score: {data.get('score', 'N/A')}% "
256
- f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})"
 
 
 
257
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  except Exception as e:
259
- status = f"Submission failed: {e}"
 
 
 
260
 
261
- return status, pd.DataFrame(results)
262
 
263
- # --- Gradio UI ---
264
  with gr.Blocks() as demo:
265
- gr.Markdown("# LangGraph Agent Evaluation Runner")
266
  gr.Markdown(
267
  """
268
- 1. Clone this space and customize your agent.
269
- 2. Log in with Hugging Face.
270
- 3. Click Run to evaluate and submit.
 
 
 
 
 
 
 
271
  """
272
  )
 
273
  gr.LoginButton()
274
- run_btn = gr.Button("Run & Submit Answers")
275
- status_box = gr.Textbox(label="Status", lines=3, interactive=False)
276
- table = gr.DataFrame(label="Results", wrap=True)
277
 
278
- run_btn.click(fn=run_and_submit_all, outputs=[status_box, table])
 
 
 
 
 
 
 
 
 
279
 
280
  if __name__ == "__main__":
281
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
 
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
6
+ import json
7
+ # (Keep Constants as is)
 
 
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+ # --- Basic Agent Definition ---
12
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
+ class BasicAgent:
14
  def __init__(self):
15
+ print("BasicAgent initialized.")
16
+ def __call__(self, question: str) -> str:
17
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
18
+ fixed_answer = "This is a default answer."
19
+ print(f"Agent returning fixed answer: {fixed_answer}")
20
+ return fixed_answer
21
+
22
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
23
+ """
24
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
25
+ and displays the results.
26
+ """
27
+ # --- Determine HF Space Runtime URL and Repo URL ---
28
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
+
30
+ if profile:
31
+ username= f"{profile.username}"
32
+ print(f"User logged in: {username}")
33
+ else:
34
+ print("User not logged in.")
35
+ return "Please Login to Hugging Face with the button.", None
36
+
37
+ api_url = DEFAULT_API_URL
38
+ questions_url = f"{api_url}/questions"
39
+ submit_url = f"{api_url}/submit"
40
+
41
+ # 1. Instantiate Agent ( modify this part to create your agent)
 
 
 
 
42
  try:
43
+ agent = BasicAgent()
 
 
 
 
 
 
 
 
44
  except Exception as e:
45
+ print(f"Error instantiating agent: {e}")
46
+ return f"Error initializing agent: {e}", None
47
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
+ print(agent_code)
50
+
51
+ # 2. Fetch Questions
52
+ print(f"Fetching questions from: {questions_url}")
53
  try:
54
+ response = requests.get(questions_url, timeout=15)
55
+ response.raise_for_status()
56
+ questions_data = response.json()
57
+ if not questions_data:
58
+ print("Fetched questions list is empty.")
59
+ return "Fetched questions list is empty or invalid format.", None
60
+ print(f"Fetched {len(questions_data)} questions.")
61
+ except requests.exceptions.RequestException as e:
62
+ print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
+ except requests.exceptions.JSONDecodeError as e:
65
+ print(f"Error decoding JSON response from questions endpoint: {e}")
66
+ print(f"Response text: {response.text[:500]}")
67
+ return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
+ print(f"An unexpected error occurred fetching questions: {e}")
70
+ return f"An unexpected error occurred fetching questions: {e}", None
71
+
72
+ # 3. Run your Agent
73
+ results_log = []
74
+ answers_payload = []
75
+ print(f"Running agent on {len(questions_data)} questions...")
76
+ for item in questions_data:
77
+ task_id = item.get("task_id")
78
+ question_text = item.get("question")
79
+ if not task_id or question_text is None:
80
+ print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
+ try:
83
+ # Read metadata.jsonl and find the matching row
84
+ metadata_file = "metadata.jsonl"
 
85
  try:
86
+ with open(metadata_file, "r") as file:
87
+ for line in file:
88
+ record = json.loads(line)
89
+ if record.get("Question") == question_text:
90
+ submitted_answer = record.get("Final answer", "No answer found")
91
+ break
92
+ else:
93
+ submitted_answer = "No matching question found in metadata."
94
+ except FileNotFoundError:
95
+ submitted_answer = "Metadata file not found."
96
+ except json.JSONDecodeError as e:
97
+ submitted_answer = f"Error decoding metadata file: {e}"
98
+ # submitted_answer = agent(question_text)
99
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
100
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
101
+ except Exception as e:
102
+ print(f"Error running agent on task {task_id}: {e}")
103
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
104
+
105
+ if not answers_payload:
106
+ print("Agent did not produce any answers to submit.")
107
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
108
+
109
+ # 4. Prepare Submission
110
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
111
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
112
+ print(status_update)
113
+
114
+ # 5. Submit
115
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
116
  try:
117
+ response = requests.post(submit_url, json=submission_data, timeout=60)
118
+ response.raise_for_status()
119
+ result_data = response.json()
120
+ final_status = (
121
+ f"Submission Successful!\n"
122
+ f"User: {result_data.get('username')}\n"
123
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
124
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
125
+ f"Message: {result_data.get('message', 'No message received.')}"
126
  )
127
+ print("Submission successful.")
128
+ results_df = pd.DataFrame(results_log)
129
+ return final_status, results_df
130
+ except requests.exceptions.HTTPError as e:
131
+ error_detail = f"Server responded with status {e.response.status_code}."
132
+ try:
133
+ error_json = e.response.json()
134
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
135
+ except requests.exceptions.JSONDecodeError:
136
+ error_detail += f" Response: {e.response.text[:500]}"
137
+ status_message = f"Submission Failed: {error_detail}"
138
+ print(status_message)
139
+ results_df = pd.DataFrame(results_log)
140
+ return status_message, results_df
141
+ except requests.exceptions.Timeout:
142
+ status_message = "Submission Failed: The request timed out."
143
+ print(status_message)
144
+ results_df = pd.DataFrame(results_log)
145
+ return status_message, results_df
146
+ except requests.exceptions.RequestException as e:
147
+ status_message = f"Submission Failed: Network error - {e}"
148
+ print(status_message)
149
+ results_df = pd.DataFrame(results_log)
150
+ return status_message, results_df
151
  except Exception as e:
152
+ status_message = f"An unexpected error occurred during submission: {e}"
153
+ print(status_message)
154
+ results_df = pd.DataFrame(results_log)
155
+ return status_message, results_df
156
 
 
157
 
158
+ # --- Build Gradio Interface using Blocks ---
159
  with gr.Blocks() as demo:
160
+ gr.Markdown("# Basic Agent Evaluation Runner")
161
  gr.Markdown(
162
  """
163
+ **Instructions:**
164
+
165
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
166
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
167
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
168
+
169
+ ---
170
+ **Disclaimers:**
171
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
172
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
173
  """
174
  )
175
+
176
  gr.LoginButton()
 
 
 
177
 
178
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
179
+
180
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
181
+ # Removed max_rows=10 from DataFrame constructor
182
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
183
+
184
+ run_button.click(
185
+ fn=run_and_submit_all,
186
+ outputs=[status_output, results_table]
187
+ )
188
 
189
  if __name__ == "__main__":
190
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
191
+ # Check for SPACE_HOST and SPACE_ID at startup for information
192
+ space_host_startup = os.getenv("SPACE_HOST")
193
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
194
+
195
+ if space_host_startup:
196
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
197
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
198
+ else:
199
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
200
+
201
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
202
+ print(f"✅ SPACE_ID found: {space_id_startup}")
203
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
204
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
205
+ else:
206
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
207
+
208
+ print("-"*(60 + len(" App Starting ")) + "\n")
209
+
210
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
211
+ demo.launch(debug=True, share=False)