jonathan9879 commited on
Commit
ee57d8e
·
verified ·
1 Parent(s): 30ab757

native gemini tooling

Browse files
Files changed (1) hide show
  1. app.py +92 -143
app.py CHANGED
@@ -1,166 +1,112 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
- import pandas as pd
6
- import google.generativeai as genai
7
  import re
8
  import time
9
- from google.api_core import exceptions
 
 
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
- MAX_ITERATIONS = 7
14
- MAX_RETRIES = 5
15
 
16
- # --- Tool Definitions (Unchanged) ---
17
- class WebSearchTool:
18
- def __init__(self, api_key):
19
- self.api_key = api_key
20
- self.url = "https://api.perplexity.ai/chat/completions"
21
- def execute(self, query: str) -> str:
22
- print(f"Executing WebSearchTool with query: {query}")
23
- payload = {"model": "llama-3-sonar-small-32k-online", "messages": [{"role": "system", "content": "You are a world-class research assistant. Answer the user's query based on verifiable public information. Be precise and comprehensive."}, {"role": "user", "content": query}]}
24
- headers = {"accept": "application/json", "content-type": "application/json", "Authorization": f"Bearer {self.api_key}"}
25
- try:
26
- response = requests.post(self.url, json=payload, headers=headers, timeout=40)
27
- response.raise_for_status()
28
- return response.json()['choices'][0]['message']['content']
29
- except requests.exceptions.RequestException as e:
30
- return f"Error: Could not get a response from the web search tool. {e}"
31
-
32
- class FileDownloaderTool:
33
- def __init__(self, api_url: str):
34
- self.api_url = api_url
35
- def execute(self, task_id: str) -> str:
36
- file_url = f"{self.api_url}/files/{task_id}"
37
- try:
38
- response = requests.get(file_url, timeout=20)
39
- response.raise_for_status()
40
- content = response.text
41
- if len(content) > 5000: return f"File content (first 5000 chars):\n{content[:5000]}"
42
- return f"File content:\n{content}"
43
- except requests.exceptions.HTTPError as e:
44
- if e.response.status_code == 404: return "No file is associated with this task."
45
- return f"Error: Failed to download file due to an HTTP error: {e}"
46
- except requests.exceptions.RequestException as e:
47
- return f"Error: Failed to download file due to a network error: {e}"
48
-
49
- # --- GAIA Agent Definition ---
50
- class GAIAAgent:
51
- def __init__(self, gemini_api_key: str, pplx_api_key: str, api_url: str):
52
- print("Initializing GAIAAgent...")
53
  genai.configure(api_key=gemini_api_key)
54
 
55
- # --- CORRECTING THE MODEL TO THE USER'S SPECIFICATION ---
56
- self.model_name = 'gemini-2.5-pro-preview-06-05'
57
- self.model = genai.GenerativeModel(self.model_name)
58
- print(f"Agent equipped with user-specified model: {self.model_name}")
59
 
60
- self.tools = {"WebSearch": WebSearchTool(api_key=pplx_api_key), "FileDownloader": FileDownloaderTool(api_url=api_url)}
61
-
62
- self.zero_shot_prompt_template = """
63
- Analyze the following question. If the answer is self-contained in the question and requires no external tools, provide the answer directly and concisely. Otherwise, respond with the single word: UNSURE.
64
-
65
- Question: {question}
66
- Answer:"""
67
-
68
- self.react_prompt_template = """
69
- You are a state-of-the-art reasoning agent. Your goal is to answer the user's question by creating a plan and executing it using the tools provided.
70
-
71
- **Tools Available:**
72
- - **WebSearch[query]**: Searches the web for information. Use different queries if initial results are not satisfactory.
73
- - **FileDownloader[task_id]**: Downloads a file for a specific task. The task_id is '{task_id}'.
74
-
75
- **Reasoning Format:**
76
- Thought: My reasoning process and plan to solve the question.
77
- Action: The tool I will use, in the format `ToolName[input]`.
78
- Observation: [The result from the tool will be inserted here]
79
- ... (The Thought/Action/Observation cycle can repeat)
80
- Thought: I have sufficient information to provide the final answer.
81
- Final Answer: The definitive answer to the user's question.
82
-
83
- **Guiding Principles:**
84
- 1. **Persist:** Do not give up easily. If a search fails, re-evaluate and try a more specific or different query.
85
- 2. **Conclude:** Once you have the answer, state it clearly with `Final Answer:`.
86
- 3. **Fail Gracefully:** If, after several genuine attempts, you conclude the answer is unobtainable, state `Final Answer: I am unable to answer this question.`
87
-
88
- Question: {question}"""
89
- print("GAIAAgent initialized with refined prompts for the specified model.")
90
-
91
- def _call_gemini_api_with_backoff(self, prompt_text):
92
- retries = 0
93
- while retries < MAX_RETRIES:
94
- try:
95
- time.sleep(2)
96
- response = self.model.generate_content(prompt_text)
97
- return response.text
98
- except exceptions.ResourceExhausted as e:
99
- wait_time = (2 ** retries) + 2
100
- print(f"API Rate Limit Exceeded (429). Waiting for {wait_time}s to retry...")
101
- time.sleep(wait_time)
102
- retries += 1
103
- except Exception as e:
104
- print(f"An unexpected error occurred with Gemini API: {e}")
105
- return f"AGENT_ERROR: An unexpected error occurred: {e}"
106
- return "AGENT_ERROR: API rate limit exceeded after multiple retries."
107
 
108
  def __call__(self, question: str, task_id: str) -> str:
109
- print(f"\n{'='*20}\nProcessing Task ID: {task_id}\nQuestion: {question[:100]}...")
110
- print(f"--- Using model: {self.model_name} ---")
111
-
112
- # Step 1: Zero-Shot Attempt
113
- zero_shot_prompt = self.zero_shot_prompt_template.format(question=question)
114
- zero_shot_answer = self._call_gemini_api_with_backoff(zero_shot_prompt).strip()
115
- if "AGENT_ERROR" not in zero_shot_answer and "UNSURE" not in zero_shot_answer.upper():
116
- print(f"Zero-shot successful! Answer: {zero_shot_answer}")
117
- return zero_shot_answer
118
-
119
- # Step 2: ReAct Loop
120
- print("--- Zero-shot failed, starting ReAct loop ---")
121
- current_prompt_history = self.react_prompt_template.format(question=question, task_id=task_id)
122
 
123
- for i in range(MAX_ITERATIONS):
124
- print(f"\n--- ReAct Iteration {i+1} ---")
125
- response_text = self._call_gemini_api_with_backoff(current_prompt_history)
126
- print(f"LLM Response:\n{response_text}")
127
-
128
- if "AGENT_ERROR" in response_text: return response_text
129
-
130
- final_answer_match = re.search(r"Final Answer:\s*(.*)", response_text, re.DOTALL)
131
- if final_answer_match:
132
- final_answer = final_answer_match.group(1).strip()
133
- print(f"Found Final Answer: {final_answer}")
134
- return final_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- action_match = re.search(r"Action:\s*(\w+)\[(.*?)\]", response_text, re.DOTALL)
137
- if action_match:
138
- tool_name = action_match.group(1).strip()
139
- tool_input = action_match.group(2).strip()
140
- if tool_name in self.tools:
141
- tool = self.tools[tool_name]
142
- try:
143
- observation = tool.execute(task_id if tool_name == "FileDownloader" else tool_input)
144
- except Exception as e: observation = f"Error executing tool: {e}"
145
- current_prompt_history += f"\n{response_text}\nObservation: {observation}"
146
- else:
147
- current_prompt_history += f"\n{response_text}\nObservation: Error - The tool '{tool_name}' does not exist."
148
- else:
149
- return response_text.strip()
150
- return "AGENT_ERROR: Agent reached max iterations."
151
 
152
- # --- Main run_and_submit_all function ---
153
  def run_and_submit_all(profile: gr.OAuthProfile | None):
154
  space_id = os.getenv("SPACE_ID")
155
  if not profile: return "Please Login to Hugging Face with the button.", None
156
  username = f"{profile.username}"
157
 
158
  pplx_key, gemini_key = os.getenv("PPLX_API_KEY"), os.getenv("GEMINI_API_KEY")
159
- if not pplx_key or not gemini_key: return "API keys not found in Space secrets.", None
160
 
161
  api_url = DEFAULT_API_URL
162
  try:
163
- agent = GAIAAgent(gemini_api_key=gemini_key, pplx_api_key=pplx_key, api_url=api_url)
 
164
  questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
165
  except Exception as e: return f"Error during setup: {e}", None
166
 
@@ -173,10 +119,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
173
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
174
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
175
  except Exception as e:
176
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
177
 
178
- print(f"--- Waiting for 12 seconds before next question... ---")
179
- time.sleep(12)
 
180
 
181
  if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
182
 
@@ -195,10 +144,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
195
  except requests.exceptions.RequestException as e:
196
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
197
 
198
- # --- Gradio Interface (Unchanged) ---
199
  with gr.Blocks() as demo:
200
- gr.Markdown("# GAIA Agent Evaluation Runner")
201
- gr.Markdown("Equipped with the user-specified **gemini-2.5-pro-preview-06-05** model.")
202
  gr.LoginButton()
203
  run_button = gr.Button("Run Evaluation & Submit All Answers")
204
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
1
  import os
2
  import gradio as gr
3
  import requests
 
 
 
4
  import re
5
  import time
6
+ import pandas as pd
7
+ import google.generativeai as genai
8
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
9
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
12
 
13
+ # --- New Native Gemini Agent ---
14
+ class NativeGeminiAgent:
15
+ """
16
+ An agent that leverages Gemini's native multi-modal capabilities,
17
+ including grounding, video, and file understanding.
18
+ """
19
+ def __init__(self, gemini_api_key: str, api_url: str):
20
+ print("Initializing NativeGeminiAgent...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  genai.configure(api_key=gemini_api_key)
22
 
23
+ self.api_url = api_url
 
 
 
24
 
25
+ # Enable native grounding with Google Search
26
+ google_search_retrieval = genai.protos.Tool(
27
+ google_search_retrieval=genai.protos.GoogleSearchRetrieval(disable_attribution=False)
28
+ )
29
+
30
+ # Configure the model with the native tool
31
+ self.model_name = 'gemini-1.5-pro-latest' # Using the best stable model
32
+ self.model = genai.GenerativeModel(
33
+ model_name=self.model_name,
34
+ tools=[google_search_retrieval],
35
+ # A more direct prompt, trusting the model's native abilities
36
+ system_instruction="You are a world-class problem solver. Your goal is to answer the user's question accurately. Use your tools and reasoning abilities to provide a definitive answer.",
37
+ # Safety settings dialed down to allow answering controversial topics if they appear in GAIA
38
+ safety_settings={
39
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
40
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
41
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
42
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
43
+ }
44
+ )
45
+ print(f"Agent equipped with {self.model_name} and native Google Search grounding.")
46
+
47
+ def _check_if_file_exists(self, url: str) -> bool:
48
+ """Checks if a remote file exists before sending it to Gemini."""
49
+ try:
50
+ response = requests.head(url, timeout=10)
51
+ return response.status_code == 200
52
+ except requests.exceptions.RequestException:
53
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def __call__(self, question: str, task_id: str) -> str:
56
+ print(f"\n{'='*20}\nProcessing Task ID: {task_id}")
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ # --- This is the core of the new multi-modal logic ---
59
+ # We build a list of "parts" to send to the model, not just a string.
60
+ prompt_parts = [question]
61
+
62
+ # 1. Automatically find and add any URLs from the question text
63
+ urls_in_question = re.findall(r'https?://\S+', question)
64
+ if urls_in_question:
65
+ for url in urls_in_question:
66
+ print(f"Found URL in question: {url}")
67
+ prompt_parts.append(genai.Part.from_uri(uri=url, mime_type="video/mp4")) # Assume video for now, Gemini can handle it
68
+
69
+ # 2. Check for and add any associated files from the GAIA server
70
+ file_url = f"{self.api_url}/files/{task_id}"
71
+ if self._check_if_file_exists(file_url):
72
+ print(f"Found associated file, adding URL: {file_url}")
73
+ # We need to determine the mime type. Let's assume common ones for GAIA.
74
+ # A simple heuristic can be used, or we can try to guess from the URL.
75
+ mime_type = "image/jpeg" # Default, can be improved
76
+ if '.pdf' in file_url: mime_type = "application/pdf"
77
+ if '.txt' in file_url: mime_type = "text/plain"
78
+ prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
79
+ else:
80
+ print("No associated file found for this task.")
81
+
82
+ print(f"Sending {len(prompt_parts)} parts to the model.")
83
 
84
+ try:
85
+ # Generate the response using the multi-modal prompt
86
+ response = self.model.generate_content(prompt_parts)
87
+
88
+ # The grounding feature may add citations. We need to remove them for the final answer.
89
+ final_answer = re.sub(r'\[\d+\]', '', response.text).strip()
90
+ print(f"Model generated answer: {final_answer}")
91
+ return final_answer
92
+
93
+ except Exception as e:
94
+ print(f"An error occurred while calling the Gemini API: {e}")
95
+ return f"AGENT_ERROR: Could not get a response from the model. Details: {e}"
 
 
 
96
 
97
+ # --- Main run_and_submit_all function (largely the same, but simpler) ---
98
  def run_and_submit_all(profile: gr.OAuthProfile | None):
99
  space_id = os.getenv("SPACE_ID")
100
  if not profile: return "Please Login to Hugging Face with the button.", None
101
  username = f"{profile.username}"
102
 
103
  pplx_key, gemini_key = os.getenv("PPLX_API_KEY"), os.getenv("GEMINI_API_KEY")
104
+ if not gemini_key: return "CRITICAL ERROR: GEMINI_API_KEY not found in Space secrets.", None
105
 
106
  api_url = DEFAULT_API_URL
107
  try:
108
+ # We no longer need the Perplexity key for the agent
109
+ agent = NativeGeminiAgent(gemini_api_key=gemini_key, api_url=api_url)
110
  questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
111
  except Exception as e: return f"Error during setup: {e}", None
112
 
 
119
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
120
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
121
  except Exception as e:
122
+ error_message = f"AGENT CRASH: {e}"
123
+ print(error_message)
124
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_message})
125
 
126
+ # API calls are now fewer but more complex. A delay is still wise.
127
+ print(f"--- Waiting for 10 seconds before next question... ---")
128
+ time.sleep(10)
129
 
130
  if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
131
 
 
144
  except requests.exceptions.RequestException as e:
145
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
146
 
147
+ # --- Gradio Interface ---
148
  with gr.Blocks() as demo:
149
+ gr.Markdown("# Native Multi-Modal GAIA Agent")
150
+ gr.Markdown("This agent uses Gemini 1.5 Pro with native Google Search grounding and direct multi-modal understanding (video, images, files).")
151
  gr.LoginButton()
152
  run_button = gr.Button("Run Evaluation & Submit All Answers")
153
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)