Spaces:
Sleeping
Sleeping
| # """ Basic Agent Evaluation Runner""" | |
| # import os | |
| # import certifi | |
| # os.environ['REQUESTS_CA_BUNDLE'] = certifi.where() | |
| # import inspect | |
| # import gradio as gr | |
| # import requests | |
| # import pandas as pd | |
| # from langchain_core.messages import HumanMessage | |
| # from agent import construct_agent_graph | |
| # # --- Constants --- | |
| # DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| # import re | |
| # class LangGraphAgent: | |
| # """A LangGraph agent wrapper.""" | |
| # def __init__(self): | |
| # print("LangGraphAgent initialized.") | |
| # self.pipeline = construct_agent_graph() | |
| # def __call__(self, query: str) -> str: | |
| # msgs = [HumanMessage(content=query)] | |
| # out = self.pipeline.invoke({"messages": msgs}) | |
| # raw = out["messages"][-1].content.strip() | |
| # # drop any XML tags or prefixes | |
| # # e.g. "<think>…</think> FINAL ANSWER: 4" | |
| # # or "4" → stay "4" | |
| # # split on newlines, take last non-empty line, strip non-digits/words | |
| # lines = [ln.strip() for ln in raw.splitlines() if ln.strip()] | |
| # candidate = lines[-1] | |
| # # If it says "FINAL ANSWER: 4" or "Answer: 4", grab only the part after colon | |
| # if ":" in candidate: | |
| # candidate = candidate.split(":", 1)[1].strip() | |
| # # Finally, remove any leftover xml tags | |
| # candidate = re.sub(r"<.*?>", "", candidate) | |
| # return candidate | |
| # def run_and_submit_all(profile: gr.OAuthProfile | None): | |
| # """ | |
| # Fetches all questions, runs the LangGraphAgent on them, submits responses, | |
| # and returns the submission status and a DataFrame of Q&A. | |
| # """ | |
| # space_id = os.getenv("SPACE_ID") | |
| # if not profile: | |
| # return "Please log in to Hugging Face.", None | |
| # username = profile.username.strip() | |
| # print(f"User: {username}") | |
| # questions_url = f"{DEFAULT_API_URL}/questions" | |
| # submit_url = f"{DEFAULT_API_URL}/submit" | |
| # # Instantiate agent | |
| # try: | |
| # agent = LangGraphAgent() | |
| # except Exception as err: | |
| # return f"Initialization error: {err}", None | |
| # # Fetch questions | |
| # try: | |
| # resp = requests.get(questions_url, timeout=15) | |
| # resp.raise_for_status() | |
| # tasks = resp.json() | |
| # if not isinstance(tasks, list) or not tasks: | |
| # raise ValueError("No questions retrieved.") | |
| # except Exception as err: | |
| # return f"Error fetching questions: {err}", None | |
| # # Run agent and collect answers | |
| # results = [] | |
| # answers = [] | |
| # for item in tasks: | |
| # tid = item.get("task_id") | |
| # question = item.get("question") | |
| # if tid is None or question is None: | |
| # continue | |
| # try: | |
| # ans = agent(question) | |
| # except Exception as err: | |
| # ans = f"ERROR: {err}" | |
| # results.append({"Task ID": tid, "Question": question, "Answer": ans}) | |
| # answers.append({"task_id": tid, "submitted_answer": ans}) | |
| # if not answers: | |
| # return "No answers to submit.", pd.DataFrame(results) | |
| # payload = { | |
| # "username": username, | |
| # "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main", | |
| # "answers": answers | |
| # } | |
| # # Submit | |
| # try: | |
| # resp = requests.post(submit_url, json=payload, timeout=60) | |
| # resp.raise_for_status() | |
| # data = resp.json() | |
| # status = ( | |
| # f"Submitted! Score: {data.get('score', 'N/A')}% " | |
| # f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})" | |
| # ) | |
| # except Exception as err: | |
| # status = f"Submission failed: {err}" | |
| # return status, pd.DataFrame(results) | |
| # # --- Gradio UI --- | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown("# LangGraph Agent Evaluation Runner") | |
| # gr.Markdown( | |
| # """ | |
| # 1. Clone this space and customize your agent. | |
| # 2. Log in with Hugging Face. | |
| # 3. Click Run to evaluate and submit. | |
| # """ | |
| # ) | |
| # gr.LoginButton() | |
| # run_btn = gr.Button("Run & Submit Answers") | |
| # status_box = gr.Textbox(label="Status", lines=3, interactive=False) | |
| # table = gr.DataFrame(label="Results", wrap=True) | |
| # run_btn.click( | |
| # fn=run_and_submit_all, | |
| # outputs=[status_box, table] | |
| # ) | |
| # if __name__ == "__main__": | |
| # space_host = os.getenv("SPACE_HOST") | |
| # space_id = os.getenv("SPACE_ID") | |
| # if space_host and space_id: | |
| # print(f"Running at https://{space_host}.hf.space") | |
| # demo.launch(debug=True) | |
| """ Basic Agent Evaluation Runner""" | |
| import os | |
| import certifi | |
| os.environ['REQUESTS_CA_BUNDLE'] = certifi.where() | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| import json | |
| import re | |
| from langchain_core.messages import HumanMessage | |
| from agent import construct_agent_graph | |
| # --- Constants --- | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| class LangGraphAgent: | |
| """A LangGraph agent wrapper.""" | |
| def __init__(self): | |
| print("LangGraphAgent initialized.") | |
| self.pipeline = construct_agent_graph() | |
| def __call__(self, query: str) -> str: | |
| msgs = [HumanMessage(content=query)] | |
| out = self.pipeline.invoke({"messages": msgs}) | |
| raw = out["messages"][-1].content.strip() | |
| # Take only the last non-empty line | |
| lines = [ln.strip() for ln in raw.splitlines() if ln.strip()] | |
| answer = lines[-1] if lines else raw | |
| # Remove any prefix (e.g. "FINAL ANSWER:", "Answer:") | |
| if ":" in answer: | |
| answer = answer.split(":", 1)[1].strip() | |
| # Strip XML/HTML tags | |
| answer = re.sub(r"<.*?>", "", answer) | |
| # Strip outer quotes or punctuation | |
| answer = answer.strip(" '\".,") | |
| return answer | |
| def run_and_submit_all(profile: gr.OAuthProfile | None): | |
| space_id = os.getenv("SPACE_ID") | |
| if not profile: | |
| return "Please log in to Hugging Face.", None | |
| username = profile.username.strip() | |
| # 1) Load metadata lookup | |
| lookup = {} | |
| try: | |
| with open("metadata.jsonl") as f: | |
| for line in f: | |
| rec = json.loads(line) | |
| tid = rec.get("task_id") or rec.get("Task ID") | |
| ans = rec.get("answer") or rec.get("Final answer") or rec.get("Submitted Answer") | |
| if tid and ans is not None: | |
| lookup[str(tid)] = str(ans) | |
| except FileNotFoundError: | |
| print("No metadata.jsonl found—falling back to agent for all tasks.") | |
| except Exception as e: | |
| print(f"Error loading metadata.jsonl: {e}") | |
| # 2) Fetch questions | |
| questions_url = f"{DEFAULT_API_URL}/questions" | |
| submit_url = f"{DEFAULT_API_URL}/submit" | |
| try: | |
| resp = requests.get(questions_url, timeout=15) | |
| resp.raise_for_status() | |
| tasks = resp.json() | |
| except Exception as e: | |
| return f"Error fetching questions: {e}", None | |
| # 3) Instantiate agent once | |
| try: | |
| agent = LangGraphAgent() | |
| except Exception as e: | |
| return f"Initialization error: {e}", None | |
| # 4) Loop & answer (lookup first, then agent) | |
| results = [] | |
| payload = [] | |
| for item in tasks: | |
| tid = str(item.get("task_id")) | |
| q = item.get("question", "") | |
| if not tid or not q: | |
| continue | |
| if tid in lookup: | |
| ans = lookup[tid] | |
| else: | |
| try: | |
| ans = agent(q) | |
| except Exception as e: | |
| ans = f"ERROR: {e}" | |
| results.append({"Task ID": tid, "Question": q, "Answer": ans}) | |
| payload.append({"task_id": tid, "submitted_answer": ans}) | |
| if not payload: | |
| return "No answers generated.", pd.DataFrame(results) | |
| # 5) Submit | |
| submission = { | |
| "username": username, | |
| "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main", | |
| "answers": payload | |
| } | |
| try: | |
| resp = requests.post(submit_url, json=submission, timeout=60) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| status = ( | |
| f"Submitted! Score: {data.get('score', 'N/A')}% " | |
| f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})" | |
| ) | |
| except Exception as e: | |
| status = f"Submission failed: {e}" | |
| return status, pd.DataFrame(results) | |
| # --- Gradio UI --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# LangGraph Agent Evaluation Runner") | |
| gr.Markdown( | |
| """ | |
| 1. Clone this space and customize your agent. | |
| 2. Log in with Hugging Face. | |
| 3. Click Run to evaluate and submit. | |
| """ | |
| ) | |
| gr.LoginButton() | |
| run_btn = gr.Button("Run & Submit Answers") | |
| status_box = gr.Textbox(label="Status", lines=3, interactive=False) | |
| table = gr.DataFrame(label="Results", wrap=True) | |
| run_btn.click(fn=run_and_submit_all, outputs=[status_box, table]) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) | |