SantoshKumar1310 commited on
Commit
3075801
Β·
verified Β·
1 Parent(s): 2889cb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -138
app.py CHANGED
@@ -1,155 +1,138 @@
1
- # app.py β€” Final GAIA Assignment Template (Enhanced)
2
-
3
- import streamlit as st
4
- from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonREPLTool, HfApiModel
5
- from huggingface_hub import login
6
- import json
7
- import time
8
  import os
 
 
 
 
 
 
9
 
10
- # =========================
11
- # 1. Define the GAIA Agent
12
- # =========================
13
  class BasicAgent:
14
  def __init__(self):
15
- st.write("πŸ”§ Initializing enhanced GAIA Agent...")
16
-
17
- # Core model from Hugging Face
18
- self.model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
19
-
20
- # Tools for reasoning and search
21
- self.tools = [
22
- DuckDuckGoSearchTool(),
23
- PythonREPLTool()
24
- ]
25
-
26
- # Create a CodeAgent instance
27
- self.agent = CodeAgent(
28
- tools=self.tools,
29
- model=self.model,
30
- name="GAIA_Level1_Agent",
31
- description="Hybrid reasoning agent using web + code execution to answer GAIA L1 questions.",
32
- max_steps=5
33
- )
34
-
35
- def sanitize(self, text: str) -> str:
36
- """Clean and simplify final outputs for benchmark scoring."""
37
- if not text:
38
- return ""
39
- text = text.strip()
40
- for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:", "answer:"]:
41
- if text.startswith(prefix):
42
- text = text[len(prefix):].strip()
43
- if text.startswith('"') and text.endswith('"'):
44
- text = text[1:-1]
45
- text = " ".join(text.split())
46
- return text
47
 
48
  def __call__(self, question: str) -> str:
49
- """Run the agent on a single GAIA question."""
50
- st.write(f"πŸ€– Running agent on: {question[:80]}...")
51
- prompt = (
52
- "You are a concise reasoning agent. "
53
- "Use your tools to find accurate answers. "
54
- "Always return only the final answer (no explanations).\n\n"
55
- f"Question: {question}"
56
- )
57
-
58
- try:
59
- response = self.agent.run(prompt)
60
- clean_answer = self.sanitize(response)
61
- st.write(f"βœ… Final Answer: {clean_answer}")
62
- return clean_answer or "N/A"
63
- except Exception as e:
64
- st.error(f"⚠️ Agent failed: {e}")
65
- return "N/A"
66
 
67
 
68
- # =======================================
69
- # 2. Streamlit UI and GAIA Leaderboard
70
- # =======================================
71
- st.set_page_config(page_title="GAIA Final Assignment", layout="centered")
72
-
73
- st.title("πŸ€– GAIA Benchmark Final Assignment")
74
- st.markdown(
75
  """
76
- Welcome to your **Final Assignment** for the Agents course!
77
 
78
- This app evaluates your custom agent on a subset of **GAIA Level 1** benchmark questions.
79
- To pass and earn your certificate πŸ…, your agent must score **β‰₯ 30% accuracy**.
 
 
 
 
80
 
81
- ---
 
 
82
 
83
- ### 🧠 Steps
84
- 1. Log in to your **Hugging Face** account.
85
- 2. Run your **agent** on the GAIA dataset.
86
- 3. Automatically submit your results for scoring.
 
87
 
88
- ---
89
- """
90
- )
91
 
92
- # =========================
93
- # 3. Login Section
94
- # =========================
95
- hf_token = st.text_input("πŸ”‘ Enter your Hugging Face access token:", type="password")
96
- if st.button("Login to Hugging Face"):
97
  try:
98
- login(token=hf_token)
99
- st.success("βœ… Logged in successfully!")
 
 
 
 
 
100
  except Exception as e:
101
- st.error(f"Login failed: {e}")
102
-
103
- # =========================
104
- # 4. Load GAIA Questions
105
- # =========================
106
- if st.button("🧩 Load GAIA Dataset"):
107
- st.info("Fetching 20 GAIA Level 1 questions...")
108
- os.system("wget -q https://huggingface.co/spaces/agents-course/Final_Assignment_Template/resolve/main/questions.json -O questions.json")
109
- st.success("βœ… Dataset loaded!")
110
-
111
- # =========================
112
- # 5. Run Evaluation
113
- # =========================
114
- if st.button("πŸš€ Run Evaluation & Submit All Answers"):
115
- if not os.path.exists("questions.json"):
116
- st.warning("Please load the GAIA dataset first.")
117
- else:
118
- with open("questions.json", "r") as f:
119
- data = json.load(f)
120
- questions = data["questions"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- agent = BasicAgent()
123
- results = {}
124
-
125
- for i, q in enumerate(questions):
126
- st.write(f"### Question {i+1}:")
127
- st.write(q)
128
- ans = agent(q)
129
- results[q] = ans
130
- time.sleep(1)
131
-
132
- # Save answers
133
- with open("answers.json", "w") as f:
134
- json.dump(results, f, indent=2)
135
- st.success("βœ… All questions answered and saved as answers.json")
136
-
137
- # Auto-submit via huggingface CLI (if supported)
138
- st.info("πŸ“€ Submitting answers to GAIA leaderboard...")
139
- os.system("python3 -m smolagents.eval_gaia submit answers.json")
140
- st.success("πŸŽ‰ Submission complete! Check your score on the leaderboard.")
141
-
142
- # =========================
143
- # 6. Notes
144
- # =========================
145
- st.markdown(
146
- """
147
- ---
148
- ### ℹ️ Notes
149
- - You can edit the agent logic inside the `BasicAgent` class to boost performance.
150
- - Use more reasoning, examples, or API calls for higher accuracy.
151
- - Make your Space **public** before submitting.
152
-
153
- Good luck on the GAIA leaderboard! 🌍
154
- """
155
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+
6
+ # --- Constants ---
7
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
 
9
+ # --- Basic Agent Definition ---
10
+ # πŸ‘‰ You can customize this class with your own logic or tools
 
11
  class BasicAgent:
12
  def __init__(self):
13
+ print("βœ… BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def __call__(self, question: str) -> str:
16
+ print(f"🧠 Received question: {question[:60]}...")
17
+ # Default fixed answer (customize this)
18
+ fixed_answer = "This is a default answer."
19
+ print(f"πŸ’¬ Returning: {fixed_answer}")
20
+ return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
24
+ """
25
+ Fetch all questions, run the agent, submit answers, and show results.
 
 
 
 
26
  """
27
+ space_id = os.getenv("SPACE_ID") # Hugging Face Space ID
28
 
29
+ if profile:
30
+ username = profile.username
31
+ print(f"πŸ‘€ User logged in: {username}")
32
+ else:
33
+ print("❌ User not logged in.")
34
+ return "Please login to Hugging Face first.", None
35
 
36
+ api_url = DEFAULT_API_URL
37
+ questions_url = f"{api_url}/questions"
38
+ submit_url = f"{api_url}/submit"
39
 
40
+ # 1️⃣ Create Agent
41
+ try:
42
+ agent = BasicAgent()
43
+ except Exception as e:
44
+ return f"Agent initialization failed: {e}", None
45
 
46
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local_Run"
47
+ print(f"πŸ“ Agent code link: {agent_code}")
 
48
 
49
+ # 2️⃣ Fetch Questions
 
 
 
 
50
  try:
51
+ print("πŸ“‘ Fetching questions...")
52
+ response = requests.get(questions_url, timeout=15)
53
+ response.raise_for_status()
54
+ questions_data = response.json()
55
+ if not questions_data:
56
+ return "Fetched question list is empty or invalid.", None
57
+ print(f"βœ… Retrieved {len(questions_data)} questions.")
58
  except Exception as e:
59
+ return f"Error fetching questions: {e}", None
60
+
61
+ # 3️⃣ Run Agent
62
+ results_log = []
63
+ answers_payload = []
64
+ for item in questions_data:
65
+ task_id = item.get("task_id")
66
+ question_text = item.get("question")
67
+ if not task_id or question_text is None:
68
+ continue
69
+ try:
70
+ submitted_answer = agent(question_text)
71
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
72
+ results_log.append({
73
+ "Task ID": task_id,
74
+ "Question": question_text,
75
+ "Submitted Answer": submitted_answer
76
+ })
77
+ except Exception as e:
78
+ results_log.append({
79
+ "Task ID": task_id,
80
+ "Question": question_text,
81
+ "Submitted Answer": f"AGENT ERROR: {e}"
82
+ })
83
+
84
+ if not answers_payload:
85
+ return "No answers generated by the agent.", pd.DataFrame(results_log)
86
+
87
+ # 4️⃣ Submit Answers
88
+ submission_data = {
89
+ "username": username.strip(),
90
+ "agent_code": agent_code,
91
+ "answers": answers_payload
92
+ }
93
 
94
+ try:
95
+ print("πŸ“€ Submitting answers...")
96
+ response = requests.post(submit_url, json=submission_data, timeout=60)
97
+ response.raise_for_status()
98
+ result_data = response.json()
99
+ final_status = (
100
+ f"βœ… Submission Successful!\n"
101
+ f"πŸ‘€ User: {result_data.get('username')}\n"
102
+ f"🏁 Score: {result_data.get('score', 'N/A')}% "
103
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
104
+ f"πŸ“ Message: {result_data.get('message', 'No message received.')}"
105
+ )
106
+ results_df = pd.DataFrame(results_log)
107
+ return final_status, results_df
108
+ except Exception as e:
109
+ return f"Submission failed: {e}", pd.DataFrame(results_log)
110
+
111
+
112
+ # --- Gradio Interface ---
113
+ with gr.Blocks() as demo:
114
+ gr.Markdown("# πŸ€– Basic Agent Evaluation Runner")
115
+ gr.Markdown(
116
+ """
117
+ ### Instructions:
118
+ 1️⃣ Clone this space on your Hugging Face profile.
119
+ 2️⃣ Modify the `BasicAgent` class with your logic.
120
+ 3️⃣ Log in below and run evaluation.
121
+ ---
122
+ The process may take time (the agent answers all questions).
123
+ You can customize the agent with reasoning, search tools, or memory.
124
+ """
125
+ )
126
+
127
+ gr.LoginButton()
128
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers")
129
+
130
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
131
+ results_table = gr.DataFrame(label="Questions and Agent Answers")
132
+
133
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
134
+
135
+
136
+ if __name__ == "__main__":
137
+ print("πŸš€ Launching Gradio Interface...")
138
+ demo.launch(debug=True, share=False)