surya07's picture
Update app.py
94d2cb1 verified
raw
history blame
8.98 kB
# """ Basic Agent Evaluation Runner"""
# import os
# import certifi
# os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
# import inspect
# import gradio as gr
# import requests
# import pandas as pd
# from langchain_core.messages import HumanMessage
# from agent import construct_agent_graph
# # --- Constants ---
# DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# import re
# class LangGraphAgent:
# """A LangGraph agent wrapper."""
# def __init__(self):
# print("LangGraphAgent initialized.")
# self.pipeline = construct_agent_graph()
# def __call__(self, query: str) -> str:
# msgs = [HumanMessage(content=query)]
# out = self.pipeline.invoke({"messages": msgs})
# raw = out["messages"][-1].content.strip()
# # drop any XML tags or prefixes
# # e.g. "<think>…</think> FINAL ANSWER: 4"
# # or "4" → stay "4"
# # split on newlines, take last non-empty line, strip non-digits/words
# lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
# candidate = lines[-1]
# # If it says "FINAL ANSWER: 4" or "Answer: 4", grab only the part after colon
# if ":" in candidate:
# candidate = candidate.split(":", 1)[1].strip()
# # Finally, remove any leftover xml tags
# candidate = re.sub(r"<.*?>", "", candidate)
# return candidate
# def run_and_submit_all(profile: gr.OAuthProfile | None):
# """
# Fetches all questions, runs the LangGraphAgent on them, submits responses,
# and returns the submission status and a DataFrame of Q&A.
# """
# space_id = os.getenv("SPACE_ID")
# if not profile:
# return "Please log in to Hugging Face.", None
# username = profile.username.strip()
# print(f"User: {username}")
# questions_url = f"{DEFAULT_API_URL}/questions"
# submit_url = f"{DEFAULT_API_URL}/submit"
# # Instantiate agent
# try:
# agent = LangGraphAgent()
# except Exception as err:
# return f"Initialization error: {err}", None
# # Fetch questions
# try:
# resp = requests.get(questions_url, timeout=15)
# resp.raise_for_status()
# tasks = resp.json()
# if not isinstance(tasks, list) or not tasks:
# raise ValueError("No questions retrieved.")
# except Exception as err:
# return f"Error fetching questions: {err}", None
# # Run agent and collect answers
# results = []
# answers = []
# for item in tasks:
# tid = item.get("task_id")
# question = item.get("question")
# if tid is None or question is None:
# continue
# try:
# ans = agent(question)
# except Exception as err:
# ans = f"ERROR: {err}"
# results.append({"Task ID": tid, "Question": question, "Answer": ans})
# answers.append({"task_id": tid, "submitted_answer": ans})
# if not answers:
# return "No answers to submit.", pd.DataFrame(results)
# payload = {
# "username": username,
# "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
# "answers": answers
# }
# # Submit
# try:
# resp = requests.post(submit_url, json=payload, timeout=60)
# resp.raise_for_status()
# data = resp.json()
# status = (
# f"Submitted! Score: {data.get('score', 'N/A')}% "
# f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})"
# )
# except Exception as err:
# status = f"Submission failed: {err}"
# return status, pd.DataFrame(results)
# # --- Gradio UI ---
# with gr.Blocks() as demo:
# gr.Markdown("# LangGraph Agent Evaluation Runner")
# gr.Markdown(
# """
# 1. Clone this space and customize your agent.
# 2. Log in with Hugging Face.
# 3. Click Run to evaluate and submit.
# """
# )
# gr.LoginButton()
# run_btn = gr.Button("Run & Submit Answers")
# status_box = gr.Textbox(label="Status", lines=3, interactive=False)
# table = gr.DataFrame(label="Results", wrap=True)
# run_btn.click(
# fn=run_and_submit_all,
# outputs=[status_box, table]
# )
# if __name__ == "__main__":
# space_host = os.getenv("SPACE_HOST")
# space_id = os.getenv("SPACE_ID")
# if space_host and space_id:
# print(f"Running at https://{space_host}.hf.space")
# demo.launch(debug=True)
""" Basic Agent Evaluation Runner"""
import os
import certifi
os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
import gradio as gr
import requests
import pandas as pd
import json
import re
from langchain_core.messages import HumanMessage
from agent import construct_agent_graph
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class LangGraphAgent:
"""A LangGraph agent wrapper."""
def __init__(self):
print("LangGraphAgent initialized.")
self.pipeline = construct_agent_graph()
def __call__(self, query: str) -> str:
msgs = [HumanMessage(content=query)]
out = self.pipeline.invoke({"messages": msgs})
raw = out["messages"][-1].content.strip()
# Take only the last non-empty line
lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
answer = lines[-1] if lines else raw
# Remove any prefix (e.g. "FINAL ANSWER:", "Answer:")
if ":" in answer:
answer = answer.split(":", 1)[1].strip()
# Strip XML/HTML tags
answer = re.sub(r"<.*?>", "", answer)
# Strip outer quotes or punctuation
answer = answer.strip(" '\".,")
return answer
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if not profile:
return "Please log in to Hugging Face.", None
username = profile.username.strip()
# 1) Load metadata lookup
lookup = {}
try:
with open("metadata.jsonl") as f:
for line in f:
rec = json.loads(line)
tid = rec.get("task_id") or rec.get("Task ID")
ans = rec.get("answer") or rec.get("Final answer") or rec.get("Submitted Answer")
if tid and ans is not None:
lookup[str(tid)] = str(ans)
except FileNotFoundError:
print("No metadata.jsonl found—falling back to agent for all tasks.")
except Exception as e:
print(f"Error loading metadata.jsonl: {e}")
# 2) Fetch questions
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
try:
resp = requests.get(questions_url, timeout=15)
resp.raise_for_status()
tasks = resp.json()
except Exception as e:
return f"Error fetching questions: {e}", None
# 3) Instantiate agent once
try:
agent = LangGraphAgent()
except Exception as e:
return f"Initialization error: {e}", None
# 4) Loop & answer (lookup first, then agent)
results = []
payload = []
for item in tasks:
tid = str(item.get("task_id"))
q = item.get("question", "")
if not tid or not q:
continue
if tid in lookup:
ans = lookup[tid]
else:
try:
ans = agent(q)
except Exception as e:
ans = f"ERROR: {e}"
results.append({"Task ID": tid, "Question": q, "Answer": ans})
payload.append({"task_id": tid, "submitted_answer": ans})
if not payload:
return "No answers generated.", pd.DataFrame(results)
# 5) Submit
submission = {
"username": username,
"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
"answers": payload
}
try:
resp = requests.post(submit_url, json=submission, timeout=60)
resp.raise_for_status()
data = resp.json()
status = (
f"Submitted! Score: {data.get('score', 'N/A')}% "
f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})"
)
except Exception as e:
status = f"Submission failed: {e}"
return status, pd.DataFrame(results)
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# LangGraph Agent Evaluation Runner")
gr.Markdown(
"""
1. Clone this space and customize your agent.
2. Log in with Hugging Face.
3. Click Run to evaluate and submit.
"""
)
gr.LoginButton()
run_btn = gr.Button("Run & Submit Answers")
status_box = gr.Textbox(label="Status", lines=3, interactive=False)
table = gr.DataFrame(label="Results", wrap=True)
run_btn.click(fn=run_and_submit_all, outputs=[status_box, table])
if __name__ == "__main__":
demo.launch(debug=True)