Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import math | |
| import json | |
| import requests | |
| import gradio as gr | |
| import pandas as pd | |
| from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage | |
| from langchain_core.tools import tool | |
| from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint | |
| from langgraph.graph import StateGraph, END | |
| from langgraph.graph.message import add_messages | |
| from typing import TypedDict, Annotated | |
| # --- Constants --- | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3" | |
| # ββ Agent state βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class AgentState(TypedDict): | |
| messages: Annotated[list, add_messages] | |
| # ββ Tools βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def web_search(query: str) -> str: | |
| """Search the web for current facts, people, events, or definitions.""" | |
| try: | |
| from ddgs import DDGS | |
| with DDGS() as ddgs: | |
| results = list(ddgs.text(query, max_results=5)) | |
| if not results: | |
| return "No results found." | |
| return "\n\n".join( | |
| f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body']}" | |
| for r in results | |
| ) | |
| except Exception as e: | |
| return f"Search error: {e}" | |
| def fetch_url(url: str) -> str: | |
| """Fetch and return the text content of a web page.""" | |
| try: | |
| import urllib.request | |
| req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) | |
| with urllib.request.urlopen(req, timeout=10) as resp: | |
| html = resp.read().decode("utf-8", errors="ignore") | |
| text = re.sub(r"<[^>]+>", " ", html) | |
| text = re.sub(r"\s{2,}", " ", text).strip() | |
| return text[:6000] | |
| except Exception as e: | |
| return f"Fetch error: {e}" | |
| def python_repl(code: str) -> str: | |
| """Execute Python code. Use for arithmetic, date math, string processing, list work.""" | |
| import io, contextlib, traceback | |
| buf = io.StringIO() | |
| local_ns: dict = {} | |
| try: | |
| with contextlib.redirect_stdout(buf): | |
| exec(compile(code, "<string>", "exec"), local_ns) | |
| output = buf.getvalue() | |
| lines = [l.strip() for l in code.strip().splitlines() if l.strip()] | |
| if lines: | |
| try: | |
| val = eval(lines[-1], local_ns) | |
| if val is not None: | |
| output += str(val) | |
| except Exception: | |
| pass | |
| return output.strip() if output.strip() else "(no output)" | |
| except Exception: | |
| return traceback.format_exc() | |
| def read_task_file(task_id: str) -> str: | |
| """Download the file attached to a GAIA task and return its text content.""" | |
| url = f"{DEFAULT_API_URL}/files/{task_id}" | |
| try: | |
| resp = requests.get(url, timeout=30) | |
| resp.raise_for_status() | |
| content_type = resp.headers.get("content-type", "") | |
| raw = resp.content | |
| if "pdf" in content_type or str(task_id).endswith(".pdf"): | |
| try: | |
| import pdfplumber, io | |
| with pdfplumber.open(io.BytesIO(raw)) as pdf: | |
| return "\n".join(p.extract_text() or "" for p in pdf.pages)[:8000] | |
| except Exception: | |
| pass | |
| if "image" in content_type: | |
| return f"[Image file received ({len(raw)} bytes). Analyse based on question context.]" | |
| return raw.decode("utf-8", errors="ignore")[:8000] | |
| except Exception as e: | |
| return f"File fetch error: {e}" | |
| def calculator(expression: str) -> str: | |
| """Evaluate a mathematical expression. Supports +,-,*,/,**,sqrt,log, etc.""" | |
| try: | |
| safe_globals = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")} | |
| safe_globals["__builtins__"] = {} | |
| return str(eval(expression, safe_globals)) | |
| except Exception as e: | |
| return f"Calc error: {e}" | |
| TOOLS = [web_search, fetch_url, python_repl, read_task_file, calculator] | |
| TOOL_MAP = {t.name: t for t in TOOLS} | |
| SYSTEM_PROMPT = """You are a precise research assistant solving GAIA benchmark questions. | |
| RULES: | |
| 1. Use your tools to look up facts β never guess or hallucinate. | |
| 2. Break multi-step questions into parts and solve each with tools. | |
| 3. When you have the final answer, respond with ONLY the answer β no explanation, no preamble. | |
| 4. Answers must be exact: correct spelling, correct format (numbers, lists, names). | |
| 5. For lists, separate items with commas. | |
| 6. For numbers, give only the number (with units only if the question asks). | |
| 7. If the question mentions an attached file, call read_task_file with the task_id.""" | |
| # ββ LangGraph nodes βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def agent_node(state: AgentState, llm_with_tools): | |
| messages = [SystemMessage(content=SYSTEM_PROMPT)] + state["messages"] | |
| response = llm_with_tools.invoke(messages) | |
| return {"messages": [response]} | |
| def tool_node(state: AgentState): | |
| last = state["messages"][-1] | |
| results = [] | |
| for call in last.tool_calls: | |
| fn = TOOL_MAP.get(call["name"]) | |
| try: | |
| output = fn.invoke(call["args"]) if fn else f"Unknown tool: {call['name']}" | |
| except Exception as e: | |
| output = f"Tool error: {e}" | |
| results.append(ToolMessage(content=str(output), tool_call_id=call["id"])) | |
| return {"messages": results} | |
| def should_continue(state: AgentState) -> str: | |
| last = state["messages"][-1] | |
| if hasattr(last, "tool_calls") and last.tool_calls: | |
| return "tools" | |
| return END | |
| # ββ BasicAgent β drop-in replacement for the template ββββββββββββββββββββββββ | |
| class BasicAgent: | |
| """ | |
| LangGraph ReAct agent powered by Qwen2.5-72B via HuggingFace Inference API. | |
| Implements __call__(question) -> answer so it fits the template unchanged. | |
| """ | |
| def __init__(self): | |
| hf_token = os.environ.get("HF_TOKEN", "") | |
| endpoint = HuggingFaceEndpoint( | |
| repo_id=MODEL_ID, | |
| task="text-generation", | |
| max_new_tokens=1024, | |
| temperature=0.1, | |
| huggingfacehub_api_token=hf_token, | |
| ) | |
| llm = ChatHuggingFace(llm=endpoint) | |
| llm_with_tools = llm.bind_tools(TOOLS) | |
| graph = StateGraph(AgentState) | |
| graph.add_node("agent", lambda s: agent_node(s, llm_with_tools)) | |
| graph.add_node("tools", tool_node) | |
| graph.set_entry_point("agent") | |
| graph.add_conditional_edges("agent", should_continue, {"tools": "tools", END: END}) | |
| graph.add_edge("tools", "agent") | |
| self.app = graph.compile() | |
| print("BasicAgent (LangGraph + Qwen2.5-72B) initialized.") | |
| def __call__(self, question: str, task_id: str = "") -> str: | |
| print(f"Agent received question (first 50 chars): {question[:50]}...") | |
| user_content = question | |
| if task_id: | |
| user_content += f"\n\n[Task ID for attached file: {task_id}]" | |
| try: | |
| final_state = self.app.invoke( | |
| {"messages": [HumanMessage(content=user_content)]}, | |
| config={"recursion_limit": 25}, | |
| ) | |
| answer = final_state["messages"][-1].content.strip() | |
| except Exception as e: | |
| answer = f"ERROR: {e}" | |
| print(f"Agent returning answer: {answer[:80]}") | |
| return answer | |
| # ββ run_and_submit_all β identical structure to template ββββββββββββββββββββββ | |
| def run_and_submit_all(profile=None): | |
| space_id = os.getenv("SPACE_ID") | |
| if profile: | |
| username = profile.username | |
| print(f"User logged in: {username}") | |
| else: | |
| from types import SimpleNamespace | |
| profile = SimpleNamespace(username="Hemil4") | |
| username = "Hemil4" | |
| api_url = DEFAULT_API_URL | |
| questions_url = f"{api_url}/questions" | |
| submit_url = f"{api_url}/submit" | |
| # 1. Instantiate agent | |
| try: | |
| agent = BasicAgent() | |
| except Exception as e: | |
| print(f"Error instantiating agent: {e}") | |
| return f"Error initializing agent: {e}", None | |
| agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" | |
| print(agent_code) | |
| # 2. Fetch questions | |
| print(f"Fetching questions from: {questions_url}") | |
| try: | |
| response = requests.get(questions_url, timeout=15) | |
| response.raise_for_status() | |
| questions_data = response.json() | |
| if not questions_data: | |
| return "Fetched questions list is empty or invalid format.", None | |
| print(f"Fetched {len(questions_data)} questions.") | |
| except Exception as e: | |
| return f"Error fetching questions: {e}", None | |
| # 3. Run agent | |
| results_log = [] | |
| answers_payload = [] | |
| print(f"Running agent on {len(questions_data)} questions...") | |
| for item in questions_data: | |
| task_id = item.get("task_id") | |
| question_text = item.get("question") | |
| if not task_id or question_text is None: | |
| print(f"Skipping item with missing task_id or question: {item}") | |
| continue | |
| try: | |
| submitted_answer = agent(question_text, task_id) | |
| answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}) | |
| except Exception as e: | |
| print(f"Error running agent on task {task_id}: {e}") | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}) | |
| if not answers_payload: | |
| return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) | |
| # 4. Submit | |
| submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload} | |
| print(f"Submitting {len(answers_payload)} answers to: {submit_url}") | |
| try: | |
| response = requests.post(submit_url, json=submission_data, timeout=60) | |
| response.raise_for_status() | |
| result_data = response.json() | |
| final_status = ( | |
| f"Submission Successful!\n" | |
| f"User: {result_data.get('username')}\n" | |
| f"Overall Score: {result_data.get('score', 'N/A')}% " | |
| f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" | |
| f"Message: {result_data.get('message', 'No message received.')}" | |
| ) | |
| print("Submission successful.") | |
| return final_status, pd.DataFrame(results_log) | |
| except requests.exceptions.HTTPError as e: | |
| error_detail = f"Server responded with status {e.response.status_code}." | |
| try: | |
| error_json = e.response.json() | |
| error_detail += f" Detail: {error_json.get('detail', e.response.text)}" | |
| except Exception: | |
| error_detail += f" Response: {e.response.text[:500]}" | |
| return f"Submission Failed: {error_detail}", pd.DataFrame(results_log) | |
| except Exception as e: | |
| return f"Submission Failed: {e}", pd.DataFrame(results_log) | |
| # ββ Gradio UI β identical to template ββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Basic Agent Evaluation Runner") | |
| gr.Markdown( | |
| """ | |
| **Instructions:** | |
| 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ... | |
| 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission. | |
| 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score. | |
| --- | |
| **Disclaimers:** | |
| Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions). | |
| This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. | |
| """ | |
| ) | |
| gr.LoginButton() | |
| run_button = gr.Button("Run Evaluation & Submit All Answers") | |
| status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) | |
| results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) | |
| run_button.click( | |
| fn=run_and_submit_all, | |
| outputs=[status_output, results_table] | |
| ) | |
| if __name__ == "__main__": | |
| print("\n" + "-"*30 + " App Starting " + "-"*30) | |
| space_host_startup = os.getenv("SPACE_HOST") | |
| space_id_startup = os.getenv("SPACE_ID") | |
| if space_host_startup: | |
| print(f"β SPACE_HOST found: {space_host_startup}") | |
| else: | |
| print("βΉοΈ SPACE_HOST not found (running locally?).") | |
| if space_id_startup: | |
| print(f"β SPACE_ID found: {space_id_startup}") | |
| print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") | |
| else: | |
| print("βΉοΈ SPACE_ID not found (running locally?).") | |
| print("-"*60 + "\n") | |
| demo.launch(debug=True, share=False) |