File size: 3,709 Bytes
10e9b7d eccf8e4 3c4371f 58106b9 10e9b7d e80aab9 3db6293 e80aab9 58106b9 31243f4 58106b9 31243f4 58106b9 4021bf3 58106b9 3c4371f 7e4a06b 1ca9f65 7e4a06b 7d65c66 3c4371f 7e4a06b 31243f4 e80aab9 31243f4 58106b9 36ed51a 3c4371f eccf8e4 31243f4 7d65c66 31243f4 7d65c66 58106b9 e80aab9 7d65c66 58106b9 31243f4 58106b9 7d65c66 31243f4 7d65c66 31243f4 7d65c66 e80aab9 7d65c66 e80aab9 31243f4 e80aab9 3c4371f e80aab9 58106b9 7d65c66 58106b9 e80aab9 58106b9 e80aab9 58106b9 7e4a06b 31243f4 9088b99 7d65c66 e80aab9 58106b9 e80aab9 58106b9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Qwen Agent Definition ---
class BasicAgent:
def __init__(self):
print("Initializing Qwen CodeAgent...")
self.model = HfApiModel(model_id="Qwen/Qwen2.5-72B-Instruct")
self.agent = CodeAgent(
tools=[DuckDuckGoSearchTool()],
model=self.model,
add_base_tools=True
)
def __call__(self, question: str) -> str:
try:
# Force the agent to give a short, direct answer
response = self.agent.run(f"Answer concisely and directly: {question}")
return str(response).strip()
except Exception as e:
return f"Error: {str(e)}"
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username= f"{profile.username}"
else:
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = BasicAgent()
except Exception as e:
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
except Exception as e:
return f"Error fetching questions: {e}", None
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
continue
try:
# This calls your Qwen Agent
submitted_answer = agent(question_text)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
if not answers_payload:
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# GAIA Evaluator (Qwen2.5-72B-Instruct)")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
demo.launch() |