sumangempire commited on
Commit
427ab63
·
verified ·
1 Parent(s): 991aeac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -90
app.py CHANGED
@@ -2,122 +2,100 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool
6
 
7
- # --- Constants ---
8
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
9
 
10
- class GenuineCourseAgent:
11
- def __init__(self, hf_token):
12
- print("Initializing genuine smolagents CodeAgent...")
13
-
14
- # 1. The Model: We use the powerful Qwen Coder model recommended by the course.
15
- # Passing the hf_token ensures we do not get "401 Unauthorized" errors.
16
- self.model = HfApiModel(
17
- model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
18
- token=hf_token
19
- )
20
-
21
- # 2. The Agent & Tools: We equip the agent with web search so it can find real-time facts.
22
- self.agent = CodeAgent(
23
- tools=[DuckDuckGoSearchTool()],
24
- model=self.model,
25
- add_base_tools=True,
26
- max_steps=5 # Gives the agent enough room to search and reason
27
- )
28
-
29
- def __call__(self, question: str) -> str:
30
- # 3. The Prompt: GAIA requires EXACT string matches.
31
- # We must strictly prompt the agent to avoid conversational text.
32
- prompt = (
33
- f"You are an expert AI answering questions for the GAIA benchmark.\n"
34
- f"Use the DuckDuckGo search tool to find factual information if needed.\n"
35
- f"CRITICAL INSTRUCTION: Your final answer MUST be ONLY the exact value or string requested. "
36
- f"Do NOT include any explanations, full sentences, or conversational text. "
37
- f"If the answer is a list, separate items with a comma.\n\n"
38
- f"Question: {question}"
39
- )
40
- try:
41
- response = self.agent.run(prompt)
42
- return str(response).strip()
43
- except Exception as e:
44
- print(f"Agent encountered an error: {e}")
45
- return "Error during execution"
46
 
47
- def run_and_submit_all(profile: gr.OAuthProfile | None, token: gr.OAuthToken | None):
48
- space_id = os.getenv("SPACE_ID", "local")
 
 
 
 
 
49
 
50
- if not profile or not token:
51
- return "🚨 ERROR: Please click 'Sign in with Hugging Face' before running.", None
52
-
53
- username = profile.username
54
- api_url = DEFAULT_API_URL
55
- questions_url = f"{api_url}/questions"
56
- submit_url = f"{api_url}/submit"
57
 
58
- # Instantiate our real agent with the user's secure token
 
 
 
 
 
59
  try:
60
- agent = GenuineCourseAgent(hf_token=token.token)
61
  except Exception as e:
62
- return f"Failed to initialize agent: {e}", None
63
 
64
  try:
65
- response = requests.get(questions_url, timeout=15)
66
- response.raise_for_status()
67
- questions_data = response.json()
68
  except Exception as e:
69
- return f"Error fetching questions: {e}", None
70
 
71
- results_log = []
72
- answers_payload = []
73
 
74
- print(f"Running agent on {len(questions_data)} questions. This will take time as the agent actively searches the web...")
75
 
76
- for item in questions_data:
77
- task_id = item.get("task_id")
78
- question_text = item.get("question")
 
 
 
 
 
 
 
79
 
80
- if not task_id or not question_text:
81
- continue
 
 
 
82
 
83
- # The agent natively attempts to solve the question
84
- submitted_answer = agent(question_text)
85
 
86
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
87
- results_log.append({"Task ID": task_id, "Question": question_text[:60] + "...", "Answer": submitted_answer})
88
-
89
  submission_data = {
90
- "username": username.strip(),
91
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
92
- "answers": answers_payload
93
  }
94
 
95
  try:
96
- res = requests.post(submit_url, json=submission_data, timeout=120)
97
- res.raise_for_status()
98
- result_data = res.json()
99
- score = result_data.get('score', 0)
100
-
101
- status = f"✅ Evaluation Complete!\nFinal Score: {score}%\n"
102
- if score >= 30:
103
- status += "🎉 REQUIREMENT PASSED. Please wait 45 minutes for the leaderboard to sync with the Certificate page."
104
- else:
105
- status += "⚠️ Score too low. The agent's searches may have missed the exact format."
106
-
107
- return status, pd.DataFrame(results_log)
108
  except Exception as e:
109
- return f"Submission Failed: {e}", pd.DataFrame(results_log)
110
 
111
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
112
- gr.Markdown("# 🤖 Official GAIA Agent Evaluator")
113
- gr.Markdown("This app uses a genuine `smolagents.CodeAgent` with web search to legitimately solve the Unit 4 benchmark.")
114
 
115
  gr.LoginButton()
116
- run_button = gr.Button("RUN REAL AGENT EVALUATION", variant="primary")
117
- status_output = gr.Textbox(label="Status", lines=5)
118
- results_table = gr.DataFrame(label="Agent Search Log", wrap=True)
119
 
120
- run_button.click(fn=run_and_submit_all, inputs=None, outputs=[status_output, results_table])
121
 
122
  if __name__ == "__main__":
123
  demo.launch()
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from smolagents import CodeAgent, DuckDuckGoSearchTool
6
 
7
+ # --- Handle version changes in smolagents updates ---
8
+ try:
9
+ from smolagents import InferenceClientModel as LLMModel
10
+ except ImportError:
11
+ try:
12
+ from smolagents import HfApiModel as LLMModel
13
+ except ImportError:
14
+ from smolagents import LiteLLMModel as LLMModel
15
 
16
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ def build_agent(hf_token):
19
+ # Using the course recommended model.
20
+ # Passing the token ensures you don't get 401 Unauthorized errors.
21
+ model = LLMModel(
22
+ model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
23
+ token=hf_token
24
+ )
25
 
26
+ agent = CodeAgent(
27
+ tools=[DuckDuckGoSearchTool()],
28
+ model=model,
29
+ add_base_tools=True,
30
+ max_steps=5
31
+ )
32
+ return agent
33
 
34
+ def run_evaluation(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None):
35
+ if not profile or not oauth_token:
36
+ return "🚨 ERROR: Please click 'Sign in with Hugging Face' first.", None
37
+
38
+ space_id = os.getenv("SPACE_ID", "local")
39
+
40
  try:
41
+ questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15).json()
42
  except Exception as e:
43
+ return f"Fetch Error: {e}", None
44
 
45
  try:
46
+ agent = build_agent(oauth_token.token)
 
 
47
  except Exception as e:
48
+ return f"Agent Initialization Error: {e}", None
49
 
50
+ payload = []
51
+ logs = []
52
 
53
+ print(f"Starting agent on {len(questions)} questions. This takes time as it actively searches the web...")
54
 
55
+ for q in questions:
56
+ task_id = q["task_id"]
57
+ question_text = q["question"]
58
+
59
+ # We give the agent strict instructions so it formats the answer for the grader
60
+ prompt = (
61
+ f"Solve this task. You must output ONLY the exact final answer string. "
62
+ f"Do not include explanation, thinking, or full sentences. "
63
+ f"If the answer is a list, separate by commas.\n\nTask: {question_text}"
64
+ )
65
 
66
+ try:
67
+ # The agent autonomously reasons and searches DuckDuckGo
68
+ ans = str(agent.run(prompt)).strip()
69
+ except Exception as e:
70
+ ans = "Execution Error"
71
 
72
+ payload.append({"task_id": task_id, "submitted_answer": ans})
73
+ logs.append({"Question": question_text[:60] + "...", "Answer": ans})
74
 
 
 
 
75
  submission_data = {
76
+ "username": profile.username.strip(),
77
+ "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
78
+ "answers": payload
79
  }
80
 
81
  try:
82
+ res = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=120).json()
83
+ score = res.get('score', 0)
84
+ status = f"✅ SUCCESS! Final Organic Score: {score}%\n\nIf your score is > 30%, wait 45 mins for the Certificate page to sync."
85
+ return status, pd.DataFrame(logs)
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
+ return f"Submit Error: {e}", pd.DataFrame(logs)
88
 
89
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
90
+ gr.Markdown("# 🤖 Genuine GAIA Autonomous Agent")
91
+ gr.Markdown("This app uses a real `smolagents.CodeAgent` with a web search tool to legitimately solve the Unit 4 benchmark.")
92
 
93
  gr.LoginButton()
94
+ btn = gr.Button("RUN AUTONOMOUS AGENT", variant="primary")
95
+ out_status = gr.Textbox(label="Status", lines=4)
96
+ out_table = gr.DataFrame(label="Submission Log")
97
 
98
+ btn.click(fn=run_evaluation, inputs=None, outputs=[out_status, out_table])
99
 
100
  if __name__ == "__main__":
101
  demo.launch()