orbulat commited on
Commit
63466ea
·
verified ·
1 Parent(s): 7da046a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -251
app.py CHANGED
@@ -1,266 +1,194 @@
1
- # --- Basic Agent Definition ---
2
- import asyncio
3
  import os
4
- import sys
5
- import logging
6
- import random
7
- import pandas as pd
8
  import requests
9
- import wikipedia as wiki
10
- from markdownify import markdownify as to_markdown
11
- from typing import Any
12
- from dotenv import load_dotenv
13
- from google.generativeai import types, configure
14
-
15
- from smolagents import InferenceClientModel, LiteLLMModel, ToolCallingAgent, Tool, DuckDuckGoSearchTool
16
-
17
- # Load environment and configure Gemini
18
- load_dotenv()
19
- configure(api_key=os.getenv("GOOGLE_API_KEY"))
20
-
21
- # Logging
22
- #logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
23
- #logger = logging.getLogger(__name__)
24
-
25
- # --- Model Configuration ---
26
- GEMINI_MODEL_NAME = "gemini/gemini-2.0-flash"
27
- OPENAI_MODEL_NAME = "openai/gpt-4o"
28
- GROQ_MODEL_NAME = "groq/llama3-70b-8192"
29
- DEEPSEEK_MODEL_NAME = "deepseek/deepseek-chat"
30
- HF_MODEL_NAME = "Qwen/Qwen2.5-Coder-32B-Instruct"
31
-
32
- # --- Tool Definitions ---
33
- class MathSolver(Tool):
34
- name = "math_solver"
35
- description = "Safely evaluate basic math expressions."
36
- inputs = {"input": {"type": "string", "description": "Math expression to evaluate."}}
37
- output_type = "string"
38
-
39
- def forward(self, input: str) -> str:
40
- try:
41
- return str(eval(input, {"__builtins__": {}}))
42
- except Exception as e:
43
- return f"Math error: {e}"
44
-
45
- class RiddleSolver(Tool):
46
- name = "riddle_solver"
47
- description = "Solve basic riddles using logic."
48
- inputs = {"input": {"type": "string", "description": "Riddle prompt."}}
49
- output_type = "string"
50
-
51
- def forward(self, input: str) -> str:
52
- if "forward" in input and "backward" in input:
53
- return "A palindrome"
54
- return "RiddleSolver failed."
55
-
56
- class TextTransformer(Tool):
57
- name = "text_ops"
58
- description = "Transform text: reverse, upper, lower."
59
- inputs = {"input": {"type": "string", "description": "Use prefix like reverse:/upper:/lower:"}}
60
- output_type = "string"
61
-
62
- def forward(self, input: str) -> str:
63
- if input.startswith("reverse:"):
64
- reversed_text = input[8:].strip()[::-1]
65
- if 'left' in reversed_text.lower():
66
- return "right"
67
- return reversed_text
68
- if input.startswith("upper:"):
69
- return input[6:].strip().upper()
70
- if input.startswith("lower:"):
71
- return input[6:].strip().lower()
72
- return "Unknown transformation."
73
-
74
- class GeminiVideoQA(Tool):
75
- name = "video_inspector"
76
- description = "Analyze video content to answer questions."
77
- inputs = {
78
- "video_url": {"type": "string", "description": "URL of video."},
79
- "user_query": {"type": "string", "description": "Question about video."}
80
- }
81
- output_type = "string"
82
-
83
- def __init__(self, model_name, *args, **kwargs):
84
- super().__init__(*args, **kwargs)
85
- self.model_name = model_name
86
-
87
- def forward(self, video_url: str, user_query: str) -> str:
88
- req = {
89
- 'model': f'models/{self.model_name}',
90
- 'contents': [{
91
- "parts": [
92
- {"fileData": {"fileUri": video_url}},
93
- {"text": f"Please watch the video and answer the question: {user_query}"}
94
- ]
95
- }]
96
- }
97
- url = f'https://generativelanguage.googleapis.com/v1beta/models/{self.model_name}:generateContent?key={os.getenv("GOOGLE_API_KEY")}'
98
- res = requests.post(url, json=req, headers={'Content-Type': 'application/json'})
99
- if res.status_code != 200:
100
- return f"Video error {res.status_code}: {res.text}"
101
- parts = res.json()['candidates'][0]['content']['parts']
102
- return "".join([p.get('text', '') for p in parts])
103
-
104
- class WikiTitleFinder(Tool):
105
- name = "wiki_titles"
106
- description = "Search for related Wikipedia page titles."
107
- inputs = {"query": {"type": "string", "description": "Search query."}}
108
- output_type = "string"
109
-
110
- def forward(self, query: str) -> str:
111
- results = wiki.search(query)
112
- return ", ".join(results) if results else "No results."
113
-
114
- class WikiContentFetcher(Tool):
115
- name = "wiki_page"
116
- description = "Fetch Wikipedia page content."
117
- inputs = {"page_title": {"type": "string", "description": "Wikipedia page title."}}
118
- output_type = "string"
119
-
120
- def forward(self, page_title: str) -> str:
121
- try:
122
- return to_markdown(wiki.page(page_title).html())
123
- except wiki.exceptions.PageError:
124
- return f"'{page_title}' not found."
125
-
126
- class FileAttachmentQueryTool(Tool):
127
- name = "run_query_with_file"
128
- description = """
129
- Downloads a file mentioned in a user prompt, adds it to the context, and runs a query on it.
130
- This assumes the file is 20MB or less.
131
- """
132
- inputs = {
133
- "task_id": {
134
- "type": "string",
135
- "description": "A unique identifier for the task related to this file, used to download it."
136
- },
137
- "mime_type": {
138
- "type": "string",
139
- "nullable": True,
140
- "description": "The MIME type of the file, or the best guess if unknown."
141
- },
142
- "user_query": {
143
- "type": "string",
144
- "description": "The question to answer about the file."
145
- }
146
- }
147
- output_type = "string"
148
-
149
- def forward(self, task_id: str, mime_type: str | None, user_query: str) -> str:
150
- file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
151
- file_response = requests.get(file_url)
152
- if file_response.status_code != 200:
153
- return f"Failed to download file: {file_response.status_code} - {file_response.text}"
154
- file_data = file_response.content
155
- mime_type = mime_type or file_response.headers.get('Content-Type', 'application/octet-stream')
156
-
157
- from google.generativeai import GenerativeModel
158
- model = GenerativeModel(self.model_name)
159
- response = model.generate_content([
160
- types.Part.from_bytes(data=file_data, mime_type=mime_type),
161
- user_query
162
- ])
163
 
164
- return response.text
 
 
165
 
166
  # --- Basic Agent Definition ---
 
167
  class BasicAgent:
168
- def __init__(self, provider="deepseek"):
169
  print("BasicAgent initialized.")
170
- model = self.select_model(provider)
171
- client = InferenceClientModel()
172
- tools = [
173
- DuckDuckGoSearchTool(),
174
- GeminiVideoQA(GEMINI_MODEL_NAME),
175
- WikiTitleFinder(),
176
- WikiContentFetcher(),
177
- MathSolver(),
178
- RiddleSolver(),
179
- TextTransformer(),
180
- FileAttachmentQueryTool(model_name=GEMINI_MODEL_NAME),
181
- ]
182
- self.agent = ToolCallingAgent(
183
- model=model,
184
- tools=tools,
185
- add_base_tools=False,
186
- max_steps=10,
187
- )
188
- self.agent.system_prompt = (
189
- """
190
- You are a GAIA benchmark AI assistant. Your sole purpose is to provide exact, minimal answers in the format 'FINAL ANSWER: [ANSWER]' with no additional text, explanations, or comments.
191
-
192
- - If the answer is a number, use numerals (e.g., '42', not 'forty-two'), without commas or units (e.g., no '$', '%') unless explicitly requested.
193
- - If the answer is a string, use no articles ('a', 'the'), no abbreviations (e.g., 'New York', not 'NY'), and write digits as text (e.g., 'one', not '1') unless specified.
194
- - For comma-separated lists, apply the above rules to each element based on whether it's a number or string.
195
- - Answer as literally as possible, making minimal assumptions and adhering to the question's narrowest interpretation.
196
- - For videos, analyze the entire content but extract only the precise answer to the query, ignoring irrelevant details.
197
- - For Wikipedia or search tools, distill results to the minimal correct answer, ignoring extraneous content.
198
- - If proving something, compute step-by-step internally but output only the final result in the required format.
199
- - If tool outputs are verbose, extract only the essential answer that satisfies the question.
200
- - Under no circumstances include explanations, intermediate steps, or text outside the 'FINAL ANSWER: [ANSWER]' format.
201
-
202
- Example:
203
- Question: What is 2 + 2?
204
- Response: FINAL ANSWER: 4
205
-
206
- Your response must always be:
207
- FINAL ANSWER: [ANSWER]
208
- """
209
- )
210
-
211
- def select_model(self, provider: str):
212
- if provider == "openai":
213
- return LiteLLMModel(model_id=OPENAI_MODEL_NAME, api_key=os.getenv("OPENAI_API_KEY"))
214
- elif provider == "groq":
215
- return LiteLLMModel(model_id=GROQ_MODEL_NAME, api_key=os.getenv("GROQ_API_KEY"))
216
- elif provider == "deepseek":
217
- return LiteLLMModel(model_id=DEEPSEEK_MODEL_NAME, api_key=os.getenv("DEEPSEEK_API_KEY"))
218
- elif provider == "hf":
219
- return InferenceClientModel()
220
- else:
221
- return LiteLLMModel(model_id=GEMINI_MODEL_NAME, api_key=os.getenv("GOOGLE_API_KEY"))
222
-
223
  def __call__(self, question: str) -> str:
224
  print(f"Agent received question (first 50 chars): {question[:50]}...")
225
- result = self.agent.run(question)
226
- if isinstance(result, dict) and "final_answer" in result and isinstance(result["final_answer"], str):
227
- final_str = result["final_answer"].strip()
228
- else:
229
- final_str = str(result).strip()
230
 
231
- return f"FINAL ANSWER: {final_str}"
 
 
 
 
 
 
232
 
233
- def evaluate_random_questions(self, csv_path: str = "gaia_qa.csv", sample_size: int = 3, show_steps: bool = True):
234
- df = pd.read_csv(csv_path)
235
- if not {"question", "answer"}.issubset(df.columns):
236
- print("CSV must contain 'question' and 'answer' columns.")
237
- print("Found columns:", df.columns.tolist())
238
- return
239
- samples = df.sample(n=sample_size)
240
- for _, row in samples.iterrows():
241
- question = row["question"].strip()
242
- expected = f"FINAL ANSWER: {str(row['answer']).strip()}"
243
- result = self(question).strip()
244
- if show_steps:
245
- print("---")
246
- print("Question:", question)
247
- print("Expected:", expected)
248
- print("Agent:", result)
249
- print("Correct:", expected == result)
250
- else:
251
- print(f"Q: {question}\nE: {expected}\nA: {result}\n✓: {expected == result}\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
  if __name__ == "__main__":
254
- args = sys.argv[1:]
255
- if not args or args[0] in {"-h", "--help"}:
256
- print("Usage: python agent.py [question | dev]")
257
- print(" - Provide a question to get a GAIA-style answer.")
258
- print(" - Use 'dev' to evaluate 3 random GAIA questions from gaia_qa.csv.")
259
- sys.exit(0)
 
 
 
 
260
 
261
- q = " ".join(args)
262
- agent = BasicAgent()
263
- if q == "dev":
264
- agent.evaluate_random_questions()
265
  else:
266
- print(agent(q))
 
 
 
 
 
 
 
 
1
  import os
2
+ import gradio as gr
 
 
 
3
  import requests
4
+ import inspect
5
+ import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # (Keep Constants as is)
8
+ # --- Constants ---
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
+ def __init__(self):
15
  print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
+ fixed_answer = "This is a default answer."
19
+ print(f"Agent returning fixed answer: {fixed_answer}")
20
+ return fixed_answer
 
 
21
 
22
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
23
+ """
24
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
25
+ and displays the results.
26
+ """
27
+ # --- Determine HF Space Runtime URL and Repo URL ---
28
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
+ if profile:
31
+ username= f"{profile.username}"
32
+ print(f"User logged in: {username}")
33
+ else:
34
+ print("User not logged in.")
35
+ return "Please Login to Hugging Face with the button.", None
36
+
37
+ api_url = DEFAULT_API_URL
38
+ questions_url = f"{api_url}/questions"
39
+ submit_url = f"{api_url}/submit"
40
+
41
+ # 1. Instantiate Agent ( modify this part to create your agent)
42
+ try:
43
+ agent = BasicAgent()
44
+ except Exception as e:
45
+ print(f"Error instantiating agent: {e}")
46
+ return f"Error initializing agent: {e}", None
47
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
+ print(agent_code)
50
+
51
+ # 2. Fetch Questions
52
+ print(f"Fetching questions from: {questions_url}")
53
+ try:
54
+ response = requests.get(questions_url, timeout=15)
55
+ response.raise_for_status()
56
+ questions_data = response.json()
57
+ if not questions_data:
58
+ print("Fetched questions list is empty.")
59
+ return "Fetched questions list is empty or invalid format.", None
60
+ print(f"Fetched {len(questions_data)} questions.")
61
+ except requests.exceptions.RequestException as e:
62
+ print(f"Error fetching questions: {e}")
63
+ return f"Error fetching questions: {e}", None
64
+ except requests.exceptions.JSONDecodeError as e:
65
+ print(f"Error decoding JSON response from questions endpoint: {e}")
66
+ print(f"Response text: {response.text[:500]}")
67
+ return f"Error decoding server response for questions: {e}", None
68
+ except Exception as e:
69
+ print(f"An unexpected error occurred fetching questions: {e}")
70
+ return f"An unexpected error occurred fetching questions: {e}", None
71
+
72
+ # 3. Run your Agent
73
+ results_log = []
74
+ answers_payload = []
75
+ print(f"Running agent on {len(questions_data)} questions...")
76
+ for item in questions_data:
77
+ task_id = item.get("task_id")
78
+ question_text = item.get("question")
79
+ if not task_id or question_text is None:
80
+ print(f"Skipping item with missing task_id or question: {item}")
81
+ continue
82
+ try:
83
+ submitted_answer = agent(question_text)
84
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
+ except Exception as e:
87
+ print(f"Error running agent on task {task_id}: {e}")
88
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
+
90
+ if not answers_payload:
91
+ print("Agent did not produce any answers to submit.")
92
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
+
94
+ # 4. Prepare Submission
95
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
+ print(status_update)
98
+
99
+ # 5. Submit
100
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
+ try:
102
+ response = requests.post(submit_url, json=submission_data, timeout=60)
103
+ response.raise_for_status()
104
+ result_data = response.json()
105
+ final_status = (
106
+ f"Submission Successful!\n"
107
+ f"User: {result_data.get('username')}\n"
108
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
109
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
+ f"Message: {result_data.get('message', 'No message received.')}"
111
+ )
112
+ print("Submission successful.")
113
+ results_df = pd.DataFrame(results_log)
114
+ return final_status, results_df
115
+ except requests.exceptions.HTTPError as e:
116
+ error_detail = f"Server responded with status {e.response.status_code}."
117
+ try:
118
+ error_json = e.response.json()
119
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
+ except requests.exceptions.JSONDecodeError:
121
+ error_detail += f" Response: {e.response.text[:500]}"
122
+ status_message = f"Submission Failed: {error_detail}"
123
+ print(status_message)
124
+ results_df = pd.DataFrame(results_log)
125
+ return status_message, results_df
126
+ except requests.exceptions.Timeout:
127
+ status_message = "Submission Failed: The request timed out."
128
+ print(status_message)
129
+ results_df = pd.DataFrame(results_log)
130
+ return status_message, results_df
131
+ except requests.exceptions.RequestException as e:
132
+ status_message = f"Submission Failed: Network error - {e}"
133
+ print(status_message)
134
+ results_df = pd.DataFrame(results_log)
135
+ return status_message, results_df
136
+ except Exception as e:
137
+ status_message = f"An unexpected error occurred during submission: {e}"
138
+ print(status_message)
139
+ results_df = pd.DataFrame(results_log)
140
+ return status_message, results_df
141
+
142
+
143
+ # --- Build Gradio Interface using Blocks ---
144
+ with gr.Blocks() as demo:
145
+ gr.Markdown("# Basic Agent Evaluation Runner")
146
+ gr.Markdown(
147
+ """
148
+ **Instructions:**
149
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
150
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
151
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
152
+ ---
153
+ **Disclaimers:**
154
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
155
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
156
+ """
157
+ )
158
+
159
+ gr.LoginButton()
160
+
161
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
162
+
163
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
164
+ # Removed max_rows=10 from DataFrame constructor
165
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
166
+
167
+ run_button.click(
168
+ fn=run_and_submit_all,
169
+ outputs=[status_output, results_table]
170
+ )
171
 
172
  if __name__ == "__main__":
173
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
174
+ # Check for SPACE_HOST and SPACE_ID at startup for information
175
+ space_host_startup = os.getenv("SPACE_HOST")
176
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
177
+
178
+ if space_host_startup:
179
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
180
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
181
+ else:
182
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
183
 
184
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
185
+ print(f"✅ SPACE_ID found: {space_id_startup}")
186
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
187
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
188
  else:
189
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
190
+
191
+ print("-"*(60 + len(" App Starting ")) + "\n")
192
+
193
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
194
+ demo.launch(debug=True, share=False)