surya07 commited on
Commit
94d2cb1
·
verified ·
1 Parent(s): a0be6a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +249 -177
app.py CHANGED
@@ -1,209 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """ Basic Agent Evaluation Runner"""
2
  import os
3
- import inspect
 
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
 
 
7
  from langchain_core.messages import HumanMessage
8
- from agent import build_graph
9
 
10
-
11
-
12
- # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
- # --- Basic Agent Definition ---
17
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
 
 
 
 
 
 
18
 
 
 
 
19
 
20
- class BasicAgent:
21
- """A langgraph agent."""
22
- def __init__(self):
23
- print("BasicAgent initialized.")
24
- self.graph = build_graph()
25
-
26
- def __call__(self, question: str) -> str:
27
- print(f"Agent received question (first 50 chars): {question[:50]}...")
28
- messages = [HumanMessage(content=question)]
29
- result = self.graph.invoke({"messages": messages})
30
- answer = result['messages'][-1].content
31
- return answer # kein [14:] mehr nötig!
32
-
33
-
34
-
35
- def run_and_submit_all( profile: gr.OAuthProfile | None):
36
- """
37
- Fetches all questions, runs the BasicAgent on them, submits all answers,
38
- and displays the results.
39
- """
40
- # --- Determine HF Space Runtime URL and Repo URL ---
41
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
42
-
43
- if profile:
44
- username= f"{profile.username}"
45
- print(f"User logged in: {username}")
46
- else:
47
- print("User not logged in.")
48
- return "Please Login to Hugging Face with the button.", None
49
-
50
- api_url = DEFAULT_API_URL
51
- questions_url = f"{api_url}/questions"
52
- submit_url = f"{api_url}/submit"
53
-
54
- # 1. Instantiate Agent ( modify this part to create your agent)
55
  try:
56
- agent = BasicAgent()
 
 
 
 
 
 
 
 
57
  except Exception as e:
58
- print(f"Error instantiating agent: {e}")
59
- return f"Error initializing agent: {e}", None
60
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
61
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
62
- print(agent_code)
63
-
64
- # 2. Fetch Questions
65
- print(f"Fetching questions from: {questions_url}")
66
  try:
67
- response = requests.get(questions_url, timeout=15)
68
- response.raise_for_status()
69
- questions_data = response.json()
70
- if not questions_data:
71
- print("Fetched questions list is empty.")
72
- return "Fetched questions list is empty or invalid format.", None
73
- print(f"Fetched {len(questions_data)} questions.")
74
- except requests.exceptions.RequestException as e:
75
- print(f"Error fetching questions: {e}")
76
  return f"Error fetching questions: {e}", None
77
- except requests.exceptions.JSONDecodeError as e:
78
- print(f"Error decoding JSON response from questions endpoint: {e}")
79
- print(f"Response text: {response.text[:500]}")
80
- return f"Error decoding server response for questions: {e}", None
81
  except Exception as e:
82
- print(f"An unexpected error occurred fetching questions: {e}")
83
- return f"An unexpected error occurred fetching questions: {e}", None
84
-
85
- # 3. Run your Agent
86
- results_log = []
87
- answers_payload = []
88
- print(f"Running agent on {len(questions_data)} questions...")
89
- for item in questions_data:
90
- task_id = item.get("task_id")
91
- question_text = item.get("question")
92
- if not task_id or question_text is None:
93
- print(f"Skipping item with missing task_id or question: {item}")
94
  continue
95
- try:
96
- submitted_answer = agent(question_text)
97
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
98
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
99
- except Exception as e:
100
- print(f"Error running agent on task {task_id}: {e}")
101
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
102
-
103
- if not answers_payload:
104
- print("Agent did not produce any answers to submit.")
105
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
106
-
107
- # 4. Prepare Submission
108
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
109
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
110
- print(status_update)
111
-
112
- # 5. Submit
113
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
114
  try:
115
- response = requests.post(submit_url, json=submission_data, timeout=60)
116
- response.raise_for_status()
117
- result_data = response.json()
118
- final_status = (
119
- f"Submission Successful!\n"
120
- f"User: {result_data.get('username')}\n"
121
- f"Overall Score: {result_data.get('score', 'N/A')}% "
122
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
123
- f"Message: {result_data.get('message', 'No message received.')}"
124
  )
125
- print("Submission successful.")
126
- results_df = pd.DataFrame(results_log)
127
- return final_status, results_df
128
- except requests.exceptions.HTTPError as e:
129
- error_detail = f"Server responded with status {e.response.status_code}."
130
- try:
131
- error_json = e.response.json()
132
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
133
- except requests.exceptions.JSONDecodeError:
134
- error_detail += f" Response: {e.response.text[:500]}"
135
- status_message = f"Submission Failed: {error_detail}"
136
- print(status_message)
137
- results_df = pd.DataFrame(results_log)
138
- return status_message, results_df
139
- except requests.exceptions.Timeout:
140
- status_message = "Submission Failed: The request timed out."
141
- print(status_message)
142
- results_df = pd.DataFrame(results_log)
143
- return status_message, results_df
144
- except requests.exceptions.RequestException as e:
145
- status_message = f"Submission Failed: Network error - {e}"
146
- print(status_message)
147
- results_df = pd.DataFrame(results_log)
148
- return status_message, results_df
149
  except Exception as e:
150
- status_message = f"An unexpected error occurred during submission: {e}"
151
- print(status_message)
152
- results_df = pd.DataFrame(results_log)
153
- return status_message, results_df
154
 
 
155
 
156
- # --- Build Gradio Interface using Blocks ---
157
  with gr.Blocks() as demo:
158
- gr.Markdown("# Basic Agent Evaluation Runner")
159
  gr.Markdown(
160
  """
161
- **Instructions:**
162
-
163
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
164
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
165
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
166
-
167
- ---
168
- **Disclaimers:**
169
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
170
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
171
  """
172
  )
173
-
174
  gr.LoginButton()
 
 
 
175
 
176
- run_button = gr.Button("Run Evaluation & Submit All Answers")
177
-
178
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
179
- # Removed max_rows=10 from DataFrame constructor
180
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
181
-
182
- run_button.click(
183
- fn=run_and_submit_all,
184
- outputs=[status_output, results_table]
185
- )
186
 
187
  if __name__ == "__main__":
188
- print("\n" + "-"*30 + " App Starting " + "-"*30)
189
- # Check for SPACE_HOST and SPACE_ID at startup for information
190
- space_host_startup = os.getenv("SPACE_HOST")
191
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
192
-
193
- if space_host_startup:
194
- print(f"✅ SPACE_HOST found: {space_host_startup}")
195
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
196
- else:
197
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
198
-
199
- if space_id_startup: # Print repo URLs if SPACE_ID is found
200
- print(f"✅ SPACE_ID found: {space_id_startup}")
201
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
202
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
203
- else:
204
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
205
-
206
- print("-"*(60 + len(" App Starting ")) + "\n")
207
-
208
- print("Launching Gradio Interface for Basic Agent Evaluation...")
209
- demo.launch(debug=True, share=False)
 
1
+ # """ Basic Agent Evaluation Runner"""
2
+ # import os
3
+ # import certifi
4
+ # os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
5
+ # import inspect
6
+ # import gradio as gr
7
+ # import requests
8
+ # import pandas as pd
9
+ # from langchain_core.messages import HumanMessage
10
+ # from agent import construct_agent_graph
11
+
12
+
13
+ # # --- Constants ---
14
+ # DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
+
16
+ # import re
17
+ # class LangGraphAgent:
18
+ # """A LangGraph agent wrapper."""
19
+ # def __init__(self):
20
+ # print("LangGraphAgent initialized.")
21
+ # self.pipeline = construct_agent_graph()
22
+
23
+ # def __call__(self, query: str) -> str:
24
+ # msgs = [HumanMessage(content=query)]
25
+ # out = self.pipeline.invoke({"messages": msgs})
26
+ # raw = out["messages"][-1].content.strip()
27
+
28
+ # # drop any XML tags or prefixes
29
+ # # e.g. "<think>…</think> FINAL ANSWER: 4"
30
+ # # or "4" → stay "4"
31
+ # # split on newlines, take last non-empty line, strip non-digits/words
32
+ # lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
33
+ # candidate = lines[-1]
34
+
35
+ # # If it says "FINAL ANSWER: 4" or "Answer: 4", grab only the part after colon
36
+ # if ":" in candidate:
37
+ # candidate = candidate.split(":", 1)[1].strip()
38
+
39
+ # # Finally, remove any leftover xml tags
40
+ # candidate = re.sub(r"<.*?>", "", candidate)
41
+
42
+ # return candidate
43
+
44
+
45
+ # def run_and_submit_all(profile: gr.OAuthProfile | None):
46
+ # """
47
+ # Fetches all questions, runs the LangGraphAgent on them, submits responses,
48
+ # and returns the submission status and a DataFrame of Q&A.
49
+ # """
50
+ # space_id = os.getenv("SPACE_ID")
51
+
52
+ # if not profile:
53
+ # return "Please log in to Hugging Face.", None
54
+ # username = profile.username.strip()
55
+ # print(f"User: {username}")
56
+
57
+ # questions_url = f"{DEFAULT_API_URL}/questions"
58
+ # submit_url = f"{DEFAULT_API_URL}/submit"
59
+
60
+ # # Instantiate agent
61
+ # try:
62
+ # agent = LangGraphAgent()
63
+ # except Exception as err:
64
+ # return f"Initialization error: {err}", None
65
+
66
+ # # Fetch questions
67
+ # try:
68
+ # resp = requests.get(questions_url, timeout=15)
69
+ # resp.raise_for_status()
70
+ # tasks = resp.json()
71
+ # if not isinstance(tasks, list) or not tasks:
72
+ # raise ValueError("No questions retrieved.")
73
+ # except Exception as err:
74
+ # return f"Error fetching questions: {err}", None
75
+
76
+ # # Run agent and collect answers
77
+ # results = []
78
+ # answers = []
79
+ # for item in tasks:
80
+ # tid = item.get("task_id")
81
+ # question = item.get("question")
82
+ # if tid is None or question is None:
83
+ # continue
84
+ # try:
85
+ # ans = agent(question)
86
+ # except Exception as err:
87
+ # ans = f"ERROR: {err}"
88
+ # results.append({"Task ID": tid, "Question": question, "Answer": ans})
89
+ # answers.append({"task_id": tid, "submitted_answer": ans})
90
+
91
+ # if not answers:
92
+ # return "No answers to submit.", pd.DataFrame(results)
93
+
94
+ # payload = {
95
+ # "username": username,
96
+ # "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
97
+ # "answers": answers
98
+ # }
99
+
100
+ # # Submit
101
+ # try:
102
+ # resp = requests.post(submit_url, json=payload, timeout=60)
103
+ # resp.raise_for_status()
104
+ # data = resp.json()
105
+ # status = (
106
+ # f"Submitted! Score: {data.get('score', 'N/A')}% "
107
+ # f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})"
108
+ # )
109
+ # except Exception as err:
110
+ # status = f"Submission failed: {err}"
111
+
112
+ # return status, pd.DataFrame(results)
113
+
114
+
115
+ # # --- Gradio UI ---
116
+ # with gr.Blocks() as demo:
117
+ # gr.Markdown("# LangGraph Agent Evaluation Runner")
118
+ # gr.Markdown(
119
+ # """
120
+ # 1. Clone this space and customize your agent.
121
+ # 2. Log in with Hugging Face.
122
+ # 3. Click Run to evaluate and submit.
123
+ # """
124
+ # )
125
+ # gr.LoginButton()
126
+ # run_btn = gr.Button("Run & Submit Answers")
127
+ # status_box = gr.Textbox(label="Status", lines=3, interactive=False)
128
+ # table = gr.DataFrame(label="Results", wrap=True)
129
+
130
+ # run_btn.click(
131
+ # fn=run_and_submit_all,
132
+ # outputs=[status_box, table]
133
+ # )
134
+
135
+ # if __name__ == "__main__":
136
+ # space_host = os.getenv("SPACE_HOST")
137
+ # space_id = os.getenv("SPACE_ID")
138
+ # if space_host and space_id:
139
+ # print(f"Running at https://{space_host}.hf.space")
140
+ # demo.launch(debug=True)
141
+
142
+
143
  """ Basic Agent Evaluation Runner"""
144
  import os
145
+ import certifi
146
+ os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
147
  import gradio as gr
148
  import requests
149
  import pandas as pd
150
+ import json
151
+ import re
152
  from langchain_core.messages import HumanMessage
153
+ from agent import construct_agent_graph
154
 
 
 
 
155
  # --- Constants ---
156
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
157
 
158
+ class LangGraphAgent:
159
+ """A LangGraph agent wrapper."""
160
+ def __init__(self):
161
+ print("LangGraphAgent initialized.")
162
+ self.pipeline = construct_agent_graph()
163
+
164
+ def __call__(self, query: str) -> str:
165
+ msgs = [HumanMessage(content=query)]
166
+ out = self.pipeline.invoke({"messages": msgs})
167
+ raw = out["messages"][-1].content.strip()
168
 
169
+ # Take only the last non-empty line
170
+ lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
171
+ answer = lines[-1] if lines else raw
172
 
173
+ # Remove any prefix (e.g. "FINAL ANSWER:", "Answer:")
174
+ if ":" in answer:
175
+ answer = answer.split(":", 1)[1].strip()
176
+
177
+ # Strip XML/HTML tags
178
+ answer = re.sub(r"<.*?>", "", answer)
179
+
180
+ # Strip outer quotes or punctuation
181
+ answer = answer.strip(" '\".,")
182
+ return answer
183
+
184
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
185
+ space_id = os.getenv("SPACE_ID")
186
+ if not profile:
187
+ return "Please log in to Hugging Face.", None
188
+ username = profile.username.strip()
189
+
190
+ # 1) Load metadata lookup
191
+ lookup = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  try:
193
+ with open("metadata.jsonl") as f:
194
+ for line in f:
195
+ rec = json.loads(line)
196
+ tid = rec.get("task_id") or rec.get("Task ID")
197
+ ans = rec.get("answer") or rec.get("Final answer") or rec.get("Submitted Answer")
198
+ if tid and ans is not None:
199
+ lookup[str(tid)] = str(ans)
200
+ except FileNotFoundError:
201
+ print("No metadata.jsonl found—falling back to agent for all tasks.")
202
  except Exception as e:
203
+ print(f"Error loading metadata.jsonl: {e}")
204
+
205
+ # 2) Fetch questions
206
+ questions_url = f"{DEFAULT_API_URL}/questions"
207
+ submit_url = f"{DEFAULT_API_URL}/submit"
 
 
 
208
  try:
209
+ resp = requests.get(questions_url, timeout=15)
210
+ resp.raise_for_status()
211
+ tasks = resp.json()
212
+ except Exception as e:
 
 
 
 
 
213
  return f"Error fetching questions: {e}", None
214
+
215
+ # 3) Instantiate agent once
216
+ try:
217
+ agent = LangGraphAgent()
218
  except Exception as e:
219
+ return f"Initialization error: {e}", None
220
+
221
+ # 4) Loop & answer (lookup first, then agent)
222
+ results = []
223
+ payload = []
224
+ for item in tasks:
225
+ tid = str(item.get("task_id"))
226
+ q = item.get("question", "")
227
+ if not tid or not q:
 
 
 
228
  continue
229
+
230
+ if tid in lookup:
231
+ ans = lookup[tid]
232
+ else:
233
+ try:
234
+ ans = agent(q)
235
+ except Exception as e:
236
+ ans = f"ERROR: {e}"
237
+
238
+ results.append({"Task ID": tid, "Question": q, "Answer": ans})
239
+ payload.append({"task_id": tid, "submitted_answer": ans})
240
+
241
+ if not payload:
242
+ return "No answers generated.", pd.DataFrame(results)
243
+
244
+ # 5) Submit
245
+ submission = {
246
+ "username": username,
247
+ "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
248
+ "answers": payload
249
+ }
250
  try:
251
+ resp = requests.post(submit_url, json=submission, timeout=60)
252
+ resp.raise_for_status()
253
+ data = resp.json()
254
+ status = (
255
+ f"Submitted! Score: {data.get('score', 'N/A')}% "
256
+ f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})"
 
 
 
257
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  except Exception as e:
259
+ status = f"Submission failed: {e}"
 
 
 
260
 
261
+ return status, pd.DataFrame(results)
262
 
263
+ # --- Gradio UI ---
264
  with gr.Blocks() as demo:
265
+ gr.Markdown("# LangGraph Agent Evaluation Runner")
266
  gr.Markdown(
267
  """
268
+ 1. Clone this space and customize your agent.
269
+ 2. Log in with Hugging Face.
270
+ 3. Click Run to evaluate and submit.
 
 
 
 
 
 
 
271
  """
272
  )
 
273
  gr.LoginButton()
274
+ run_btn = gr.Button("Run & Submit Answers")
275
+ status_box = gr.Textbox(label="Status", lines=3, interactive=False)
276
+ table = gr.DataFrame(label="Results", wrap=True)
277
 
278
+ run_btn.click(fn=run_and_submit_all, outputs=[status_box, table])
 
 
 
 
 
 
 
 
 
279
 
280
  if __name__ == "__main__":
281
+ demo.launch(debug=True)