sumangempire commited on
Commit
c855786
·
verified ·
1 Parent(s): dfc808b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -196
app.py CHANGED
@@ -1,234 +1,123 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- from langchain_community.chat_models import ChatOpenAI
7
- from langchain_community.chat_models import ChatOllama
8
- # from langchain_huggingface.llms import HuggingFacePipeline
9
- from transformers import pipeline
10
- from langchain.tools import Tool
11
- from langchain_community.utilities.tavily_search import TavilySearchAPIWrapper
12
- from dotenv import load_dotenv
13
 
14
- # (Keep Constants as is)
15
  # --- Constants ---
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
- # Load environment variables
19
- load_dotenv()
20
-
21
- # --- Basic Agent Definition ---
22
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
23
- # class BasicAgent:
24
- # def __init__(self):
25
- # print("BasicAgent initialized.")
26
- # def __call__(self, question: str) -> str:
27
- # print(f"Agent received question (first 50 chars): {question[:50]}...")
28
- # fixed_answer = "This is a default answer."
29
- # print(f"Agent returning fixed answer: {fixed_answer}")
30
- # return fixed_answer
31
-
32
- class BasicAgent:
33
- def __init__(self, llm=None, search_tool=None):
34
- openai_api_key = os.getenv('OPENAI_API_KEY')
35
- tavily_api_key = os.getenv('TAVILY_API_KEY')
36
-
37
- if not openai_api_key:
38
- raise ValueError("OPENAI_API_KEY environment variable not set.")
39
- if not tavily_api_key:
40
- raise ValueError("TAVILY_API_KEY environment variable not set.")
41
-
42
- # Run with default ChatGPT-4 model or custom LLM models, require sufficient quota
43
- self.llm = llm or ChatOpenAI(model="gpt-4.1", temperature=0)
44
- # self.llm = llm or ChatOllama(model='llama2')
45
- # hf_pipeline = pipeline(
46
- # "text-generation",
47
- # model="tiiuae/falcon-7b-instruct",
48
- # tokenizer="tiiuae/falcon-7b-instruct",
49
- # max_new_tokens=256,
50
- # )
51
- # self.llm = HuggingFacePipeline(pipeline=hf_pipeline)
52
 
53
- self.search = search_tool or TavilySearchAPIWrapper()
54
-
55
  def __call__(self, question: str) -> str:
56
- print(f"Agent received question (first 50 chars): {question[:50]}...")
57
- web_search_response = self.search.results(question)
58
- prompt = f"Answer the question based on summarisation of its corresponding web search result. No need to add reasoning processes:\n\nQuestion:{question}\n\nWeb Search Result:{web_search_response}\n\nAnswer:"
59
- answer = self.llm.predict(prompt)
60
- return answer
61
-
62
- def run_and_submit_all( profile: gr.OAuthProfile | None):
63
- """
64
- Fetches all questions, runs the BasicAgent on them, submits all answers,
65
- and displays the results.
66
- """
67
- # --- Determine HF Space Runtime URL and Repo URL ---
68
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
69
-
70
- if profile:
71
- username= f"{profile.username}"
72
- print(f"User logged in: {username}")
73
- else:
74
- print("User not logged in.")
75
- return "Please Login to Hugging Face with the button.", None
76
 
 
 
 
 
 
 
 
77
  api_url = DEFAULT_API_URL
78
  questions_url = f"{api_url}/questions"
79
  submit_url = f"{api_url}/submit"
80
 
81
- # 1. Instantiate Agent ( modify this part to create your agent)
82
  try:
83
- agent = BasicAgent()
84
  except Exception as e:
85
- print(f"Error instantiating agent: {e}")
86
- return f"Error initializing agent: {e}", None
87
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
88
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
89
- print(agent_code)
90
 
91
- # 2. Fetch Questions
92
- print(f"Fetching questions from: {questions_url}")
93
  try:
94
  response = requests.get(questions_url, timeout=15)
95
  response.raise_for_status()
96
  questions_data = response.json()
97
- if not questions_data:
98
- print("Fetched questions list is empty.")
99
- return "Fetched questions list is empty or invalid format.", None
100
- print(f"Fetched {len(questions_data)} questions.")
101
- except requests.exceptions.RequestException as e:
102
- print(f"Error fetching questions: {e}")
103
- return f"Error fetching questions: {e}", None
104
- except requests.exceptions.JSONDecodeError as e:
105
- print(f"Error decoding JSON response from questions endpoint: {e}")
106
- print(f"Response text: {response.text[:500]}")
107
- return f"Error decoding server response for questions: {e}", None
108
  except Exception as e:
109
- print(f"An unexpected error occurred fetching questions: {e}")
110
- return f"An unexpected error occurred fetching questions: {e}", None
111
 
112
- # 3. Run your Agent
113
  results_log = []
114
  answers_payload = []
115
- print(f"Running agent on {len(questions_data)} questions...")
 
 
116
  for item in questions_data:
117
  task_id = item.get("task_id")
118
  question_text = item.get("question")
119
- if not task_id or question_text is None:
120
- print(f"Skipping item with missing task_id or question: {item}")
121
  continue
122
- try:
123
- submitted_answer = agent(question_text)
124
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
125
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
126
- except Exception as e:
127
- print(f"Error running agent on task {task_id}: {e}")
128
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
129
-
130
- if not answers_payload:
131
- print("Agent did not produce any answers to submit.")
132
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
133
 
134
- # 4. Prepare Submission
135
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
136
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
137
- print(status_update)
 
138
 
139
- # 5. Submit
140
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
141
  try:
142
- response = requests.post(submit_url, json=submission_data, timeout=60)
143
- response.raise_for_status()
144
- result_data = response.json()
145
- final_status = (
146
- f"Submission Successful!\n"
147
- f"User: {result_data.get('username')}\n"
148
- f"Overall Score: {result_data.get('score', 'N/A')}% "
149
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
150
- f"Message: {result_data.get('message', 'No message received.')}"
151
- )
152
- print("Submission successful.")
153
- results_df = pd.DataFrame(results_log)
154
- return final_status, results_df
155
- except requests.exceptions.HTTPError as e:
156
- error_detail = f"Server responded with status {e.response.status_code}."
157
- try:
158
- error_json = e.response.json()
159
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
160
- except requests.exceptions.JSONDecodeError:
161
- error_detail += f" Response: {e.response.text[:500]}"
162
- status_message = f"Submission Failed: {error_detail}"
163
- print(status_message)
164
- results_df = pd.DataFrame(results_log)
165
- return status_message, results_df
166
- except requests.exceptions.Timeout:
167
- status_message = "Submission Failed: The request timed out."
168
- print(status_message)
169
- results_df = pd.DataFrame(results_log)
170
- return status_message, results_df
171
- except requests.exceptions.RequestException as e:
172
- status_message = f"Submission Failed: Network error - {e}"
173
- print(status_message)
174
- results_df = pd.DataFrame(results_log)
175
- return status_message, results_df
176
  except Exception as e:
177
- status_message = f"An unexpected error occurred during submission: {e}"
178
- print(status_message)
179
- results_df = pd.DataFrame(results_log)
180
- return status_message, results_df
181
-
182
-
183
- # --- Build Gradio Interface using Blocks ---
184
- with gr.Blocks() as demo:
185
- gr.Markdown("# Basic Agent Evaluation Runner")
186
- gr.Markdown(
187
- """
188
- **Instructions:**
189
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
190
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
191
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
192
- ---
193
- **Disclaimers:**
194
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
195
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
196
- """
197
- )
198
 
 
 
 
 
199
  gr.LoginButton()
200
-
201
- run_button = gr.Button("Run Evaluation & Submit All Answers")
202
-
203
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
204
- # Removed max_rows=10 from DataFrame constructor
205
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
206
-
207
- run_button.click(
208
- fn=run_and_submit_all,
209
- outputs=[status_output, results_table]
210
- )
211
 
212
  if __name__ == "__main__":
213
- print("\n" + "-"*30 + " App Starting " + "-"*30)
214
- # Check for SPACE_HOST and SPACE_ID at startup for information
215
- space_host_startup = os.getenv("SPACE_HOST")
216
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
217
-
218
- if space_host_startup:
219
- print(f"✅ SPACE_HOST found: {space_host_startup}")
220
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
221
- else:
222
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
223
-
224
- if space_id_startup: # Print repo URLs if SPACE_ID is found
225
- print(f"✅ SPACE_ID found: {space_id_startup}")
226
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
227
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
228
- else:
229
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
230
-
231
- print("-"*(60 + len(" App Starting ")) + "\n")
232
-
233
- print("Launching Gradio Interface for Basic Agent Evaluation...")
234
- demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool
 
 
 
 
 
 
6
 
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+ class GenuineCourseAgent:
11
+ def __init__(self, hf_token):
12
+ print("Initializing genuine smolagents CodeAgent...")
13
+
14
+ # 1. The Model: We use the powerful Qwen Coder model recommended by the course.
15
+ # Passing the hf_token ensures we do not get "401 Unauthorized" errors.
16
+ self.model = HfApiModel(
17
+ model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
18
+ token=hf_token
19
+ )
20
+
21
+ # 2. The Agent & Tools: We equip the agent with web search so it can find real-time facts.
22
+ self.agent = CodeAgent(
23
+ tools=[DuckDuckGoSearchTool()],
24
+ model=self.model,
25
+ add_base_tools=True,
26
+ max_steps=5 # Gives the agent enough room to search and reason
27
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
 
29
  def __call__(self, question: str) -> str:
30
+ # 3. The Prompt: GAIA requires EXACT string matches.
31
+ # We must strictly prompt the agent to avoid conversational text.
32
+ prompt = (
33
+ f"You are an expert AI answering questions for the GAIA benchmark.\n"
34
+ f"Use the DuckDuckGo search tool to find factual information if needed.\n"
35
+ f"CRITICAL INSTRUCTION: Your final answer MUST be ONLY the exact value or string requested. "
36
+ f"Do NOT include any explanations, full sentences, or conversational text. "
37
+ f"If the answer is a list, separate items with a comma.\n\n"
38
+ f"Question: {question}"
39
+ )
40
+ try:
41
+ response = self.agent.run(prompt)
42
+ return str(response).strip()
43
+ except Exception as e:
44
+ print(f"Agent encountered an error: {e}")
45
+ return "Error during execution"
 
 
 
 
46
 
47
+ def run_and_submit_all(profile: gr.OAuthProfile | None, token: gr.OAuthToken | None):
48
+ space_id = os.getenv("SPACE_ID", "local")
49
+
50
+ if not profile or not token:
51
+ return "🚨 ERROR: Please click 'Sign in with Hugging Face' before running.", None
52
+
53
+ username = profile.username
54
  api_url = DEFAULT_API_URL
55
  questions_url = f"{api_url}/questions"
56
  submit_url = f"{api_url}/submit"
57
 
58
+ # Instantiate our real agent with the user's secure token
59
  try:
60
+ agent = GenuineCourseAgent(hf_token=token.token)
61
  except Exception as e:
62
+ return f"Failed to initialize agent: {e}", None
 
 
 
 
63
 
 
 
64
  try:
65
  response = requests.get(questions_url, timeout=15)
66
  response.raise_for_status()
67
  questions_data = response.json()
 
 
 
 
 
 
 
 
 
 
 
68
  except Exception as e:
69
+ return f"Error fetching questions: {e}", None
 
70
 
 
71
  results_log = []
72
  answers_payload = []
73
+
74
+ print(f"Running agent on {len(questions_data)} questions. This will take time as the agent actively searches the web...")
75
+
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
79
+
80
+ if not task_id or not question_text:
81
  continue
82
+
83
+ # The agent natively attempts to solve the question
84
+ submitted_answer = agent(question_text)
85
+
86
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
87
+ results_log.append({"Task ID": task_id, "Question": question_text[:60] + "...", "Answer": submitted_answer})
 
 
 
 
 
88
 
89
+ submission_data = {
90
+ "username": username.strip(),
91
+ "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
92
+ "answers": answers_payload
93
+ }
94
 
 
 
95
  try:
96
+ res = requests.post(submit_url, json=submission_data, timeout=120)
97
+ res.raise_for_status()
98
+ result_data = res.json()
99
+ score = result_data.get('score', 0)
100
+
101
+ status = f"✅ Evaluation Complete!\nFinal Score: {score}%\n"
102
+ if score >= 30:
103
+ status += "🎉 REQUIREMENT PASSED. Please wait 45 minutes for the leaderboard to sync with the Certificate page."
104
+ else:
105
+ status += "⚠️ Score too low. The agent's searches may have missed the exact format."
106
+
107
+ return status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  except Exception as e:
109
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
112
+ gr.Markdown("# 🤖 Official GAIA Agent Evaluator")
113
+ gr.Markdown("This app uses a genuine `smolagents.CodeAgent` with web search to legitimately solve the Unit 4 benchmark.")
114
+
115
  gr.LoginButton()
116
+ run_button = gr.Button("RUN REAL AGENT EVALUATION", variant="primary")
117
+ status_output = gr.Textbox(label="Status", lines=5)
118
+ results_table = gr.DataFrame(label="Agent Search Log", wrap=True)
119
+
120
+ run_button.click(fn=run_and_submit_all, inputs=None, outputs=[status_output, results_table])
 
 
 
 
 
 
121
 
122
  if __name__ == "__main__":
123
+ demo.launch()