Shaukat39 commited on
Commit
74fa314
·
verified ·
1 Parent(s): 0e1911a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -76
app.py CHANGED
@@ -240,137 +240,212 @@
240
  # print("Launching Gradio Interface for Basic Agent Evaluation...")
241
  # demo.launch(debug=True, share=False)
242
 
 
 
 
243
  import os
 
244
  import gradio as gr
245
  import requests
246
  import pandas as pd
247
  from langchain_core.messages import HumanMessage
248
  from agent import build_graph
249
- import json
250
- import re
251
 
 
 
 
 
252
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
253
 
 
 
 
254
 
255
  class BasicAgent:
256
- """LangGraph agent with enhanced debug logging and answer sanitization."""
257
  def __init__(self):
258
- print("🧠 Initializing BasicAgent...")
259
  self.graph = build_graph()
260
 
261
  def __call__(self, question: str) -> str:
262
- print(f"\n📥 Received question: {repr(question)}")
 
263
  messages = [HumanMessage(content=question)]
264
- response = self.graph.invoke({"messages": messages})
265
-
266
- if not response or "messages" not in response:
267
- print("❌ No response or missing 'messages' key from graph.")
268
- return "AGENT ERROR: No valid response from graph."
269
-
270
- print("📦 Full graph response:", repr(response))
271
 
272
- raw_output = response["messages"][-1].content
273
- print("📦 Raw LLM output:", repr(raw_output))
274
 
275
- match = re.search(r"FINAL ANSWER:\s*(.+)", raw_output, re.IGNORECASE)
276
- if match:
277
- final_answer = match.group(1).strip()
278
- else:
279
- final_answer = raw_output.strip()
280
- print("⚠️ 'FINAL ANSWER:' prefix missing. Using fallback.")
281
-
282
- # Sanitize for newline bugs
283
- clean_answer = final_answer.replace("\n", " ").replace("\r", " ").strip()
284
- print("✅ Final cleaned answer:", repr(clean_answer))
285
- return clean_answer
286
-
287
-
288
- def run_and_submit_all(profile: gr.OAuthProfile | None):
289
- space_id = os.getenv("SPACE_ID", "").strip()
290
 
291
  if profile:
292
- username = f"{profile.username.strip()}"
293
- print(f"🙋 User logged in: {username}")
294
  else:
295
- return "⚠️ Please log in to Hugging Face first.", None
 
296
 
297
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main".strip()
298
- print("🛰️ Submitting from agent_code →", repr(agent_code))
299
-
300
- questions_url = f"{DEFAULT_API_URL}/questions"
301
- submit_url = f"{DEFAULT_API_URL}/submit"
302
 
 
303
  try:
304
  agent = BasicAgent()
305
  except Exception as e:
306
- return f"🚫 Failed to initialize agent: {e}", None
307
-
308
- print(f"📡 Fetching questions from {questions_url}")
 
 
 
 
 
309
  try:
310
  response = requests.get(questions_url, timeout=15)
311
  response.raise_for_status()
312
  questions_data = response.json()
 
 
 
 
 
 
 
 
 
 
 
313
  except Exception as e:
314
- return f" Failed to fetch questions: {e}", None
 
315
 
 
316
  results_log = []
317
  answers_payload = []
318
- print(f"🤖 Running agent on {len(questions_data)} questions...")
319
-
320
  for item in questions_data:
321
  task_id = item.get("task_id")
322
  question_text = item.get("question")
323
- if not task_id or not question_text:
 
324
  continue
325
  try:
326
- answer = agent(question_text)
327
- # Check for newlines explicitly
328
- if "\n" in answer:
329
- print(f"⚠️ Found newline in answer [{task_id}] →", repr(answer))
330
- answers_payload.append({"task_id": task_id.strip(), "submitted_answer": answer})
331
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
332
  except Exception as e:
333
- err_msg = f"AGENT ERROR: {e}"
334
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": err_msg})
335
 
336
  if not answers_payload:
337
- return "🚫 Agent produced no valid answers.", pd.DataFrame(results_log)
338
-
339
- submission_data = {
340
- "username": username,
341
- "agent_code": agent_code,
342
- "answers": answers_payload
343
- }
344
 
345
- print("\n📤 SUBMISSION PAYLOAD:")
346
- print(json.dumps(submission_data, indent=2))
 
 
347
 
 
 
348
  try:
349
  response = requests.post(submit_url, json=submission_data, timeout=60)
350
  response.raise_for_status()
351
- result = response.json()
352
- summary = (
353
- f"Submission Successful!\n"
354
- f"User: {result.get('username')}\n"
355
- f"Score: {result.get('score', 'N/A')}% "
356
- f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
357
- f"Message: {result.get('message', 'No message returned.')}"
358
  )
359
- return summary, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  except Exception as e:
361
- return f"🚨 Submission failed: {e}", pd.DataFrame(results_log)
 
 
 
362
 
363
 
 
364
  with gr.Blocks() as demo:
365
- gr.Markdown("## 🧪 GAIA Evaluation Runner with Debug Mode")
366
- gr.Markdown("Log in, run your agent, submit answers, and review results with logging enabled.")
 
 
 
 
 
 
 
 
 
 
 
 
367
  gr.LoginButton()
 
368
  run_button = gr.Button("Run Evaluation & Submit All Answers")
369
- status_output = gr.Textbox(label="Status / Result", lines=4)
370
- results_table = gr.DataFrame(label="QA Log")
371
 
372
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
 
 
 
373
 
374
  if __name__ == "__main__":
375
- print("🚀 Launching Gradio app...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  demo.launch(debug=True, share=False)
 
240
  # print("Launching Gradio Interface for Basic Agent Evaluation...")
241
  # demo.launch(debug=True, share=False)
242
 
243
+ #
244
+
245
+ """ Basic Agent Evaluation Runner"""
246
  import os
247
+ import inspect
248
  import gradio as gr
249
  import requests
250
  import pandas as pd
251
  from langchain_core.messages import HumanMessage
252
  from agent import build_graph
 
 
253
 
254
+
255
+
256
+ # (Keep Constants as is)
257
+ # --- Constants ---
258
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
259
 
260
+ # --- Basic Agent Definition ---
261
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
262
+
263
 
264
  class BasicAgent:
265
+ """A langgraph agent."""
266
  def __init__(self):
267
+ print("BasicAgent initialized.")
268
  self.graph = build_graph()
269
 
270
  def __call__(self, question: str) -> str:
271
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
272
+ # Wrap the question in a HumanMessage from langchain_core
273
  messages = [HumanMessage(content=question)]
274
+ messages = self.graph.invoke({"messages": messages})
275
+ answer = messages['messages'][-1].content
276
+ return answer[14:]
 
 
 
 
277
 
 
 
278
 
279
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
280
+ """
281
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
282
+ and displays the results.
283
+ """
284
+ # --- Determine HF Space Runtime URL and Repo URL ---
285
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
 
 
 
 
 
 
 
286
 
287
  if profile:
288
+ username= f"{profile.username}"
289
+ print(f"User logged in: {username}")
290
  else:
291
+ print("User not logged in.")
292
+ return "Please Login to Hugging Face with the button.", None
293
 
294
+ api_url = DEFAULT_API_URL
295
+ questions_url = f"{api_url}/questions"
296
+ submit_url = f"{api_url}/submit"
 
 
297
 
298
+ # 1. Instantiate Agent ( modify this part to create your agent)
299
  try:
300
  agent = BasicAgent()
301
  except Exception as e:
302
+ print(f"Error instantiating agent: {e}")
303
+ return f"Error initializing agent: {e}", None
304
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
305
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
306
+ print(agent_code)
307
+
308
+ # 2. Fetch Questions
309
+ print(f"Fetching questions from: {questions_url}")
310
  try:
311
  response = requests.get(questions_url, timeout=15)
312
  response.raise_for_status()
313
  questions_data = response.json()
314
+ if not questions_data:
315
+ print("Fetched questions list is empty.")
316
+ return "Fetched questions list is empty or invalid format.", None
317
+ print(f"Fetched {len(questions_data)} questions.")
318
+ except requests.exceptions.RequestException as e:
319
+ print(f"Error fetching questions: {e}")
320
+ return f"Error fetching questions: {e}", None
321
+ except requests.exceptions.JSONDecodeError as e:
322
+ print(f"Error decoding JSON response from questions endpoint: {e}")
323
+ print(f"Response text: {response.text[:500]}")
324
+ return f"Error decoding server response for questions: {e}", None
325
  except Exception as e:
326
+ print(f"An unexpected error occurred fetching questions: {e}")
327
+ return f"An unexpected error occurred fetching questions: {e}", None
328
 
329
+ # 3. Run your Agent
330
  results_log = []
331
  answers_payload = []
332
+ print(f"Running agent on {len(questions_data)} questions...")
 
333
  for item in questions_data:
334
  task_id = item.get("task_id")
335
  question_text = item.get("question")
336
+ if not task_id or question_text is None:
337
+ print(f"Skipping item with missing task_id or question: {item}")
338
  continue
339
  try:
340
+ submitted_answer = agent(question_text)
341
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
342
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
343
  except Exception as e:
344
+ print(f"Error running agent on task {task_id}: {e}")
345
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
346
 
347
  if not answers_payload:
348
+ print("Agent did not produce any answers to submit.")
349
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
 
 
 
 
350
 
351
+ # 4. Prepare Submission
352
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
353
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
354
+ print(status_update)
355
 
356
+ # 5. Submit
357
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
358
  try:
359
  response = requests.post(submit_url, json=submission_data, timeout=60)
360
  response.raise_for_status()
361
+ result_data = response.json()
362
+ final_status = (
363
+ f"Submission Successful!\n"
364
+ f"User: {result_data.get('username')}\n"
365
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
366
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
367
+ f"Message: {result_data.get('message', 'No message received.')}"
368
  )
369
+ print("Submission successful.")
370
+ results_df = pd.DataFrame(results_log)
371
+ return final_status, results_df
372
+ except requests.exceptions.HTTPError as e:
373
+ error_detail = f"Server responded with status {e.response.status_code}."
374
+ try:
375
+ error_json = e.response.json()
376
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
377
+ except requests.exceptions.JSONDecodeError:
378
+ error_detail += f" Response: {e.response.text[:500]}"
379
+ status_message = f"Submission Failed: {error_detail}"
380
+ print(status_message)
381
+ results_df = pd.DataFrame(results_log)
382
+ return status_message, results_df
383
+ except requests.exceptions.Timeout:
384
+ status_message = "Submission Failed: The request timed out."
385
+ print(status_message)
386
+ results_df = pd.DataFrame(results_log)
387
+ return status_message, results_df
388
+ except requests.exceptions.RequestException as e:
389
+ status_message = f"Submission Failed: Network error - {e}"
390
+ print(status_message)
391
+ results_df = pd.DataFrame(results_log)
392
+ return status_message, results_df
393
  except Exception as e:
394
+ status_message = f"An unexpected error occurred during submission: {e}"
395
+ print(status_message)
396
+ results_df = pd.DataFrame(results_log)
397
+ return status_message, results_df
398
 
399
 
400
+ # --- Build Gradio Interface using Blocks ---
401
  with gr.Blocks() as demo:
402
+ gr.Markdown("# Basic Agent Evaluation Runner")
403
+ gr.Markdown(
404
+ """
405
+ **Instructions:**
406
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
407
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
408
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
409
+ ---
410
+ **Disclaimers:**
411
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
412
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
413
+ """
414
+ )
415
+
416
  gr.LoginButton()
417
+
418
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
419
 
420
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
421
+ # Removed max_rows=10 from DataFrame constructor
422
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
423
+
424
+ run_button.click(
425
+ fn=run_and_submit_all,
426
+ outputs=[status_output, results_table]
427
+ )
428
 
429
  if __name__ == "__main__":
430
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
431
+ # Check for SPACE_HOST and SPACE_ID at startup for information
432
+ space_host_startup = os.getenv("SPACE_HOST")
433
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
434
+
435
+ if space_host_startup:
436
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
437
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
438
+ else:
439
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
440
+
441
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
442
+ print(f"✅ SPACE_ID found: {space_id_startup}")
443
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
444
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
445
+ else:
446
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
447
+
448
+ print("-"*(60 + len(" App Starting ")) + "\n")
449
+
450
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
451
  demo.launch(debug=True, share=False)