jonathan9879 commited on
Commit
dceeb49
·
verified ·
1 Parent(s): 0d89b31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -48
app.py CHANGED
@@ -10,30 +10,24 @@ from google.generativeai.types import HarmCategory, HarmBlockThreshold
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
- # --- New Native Gemini Agent ---
 
14
  class NativeGeminiAgent:
15
- """
16
- An agent that leverages Gemini's native multi-modal capabilities,
17
- including grounding, video, and file understanding.
18
- """
19
  def __init__(self, gemini_api_key: str, api_url: str):
20
- print("Initializing NativeGeminiAgent...")
21
  genai.configure(api_key=gemini_api_key)
22
 
23
  self.api_url = api_url
 
24
 
25
- # --- CORRECTED INITIALIZATION ---
26
- # Enable native grounding with Google Search. No parameters are needed.
27
- google_search_retrieval = genai.protos.Tool(
28
- google_search_retrieval=genai.protos.GoogleSearchRetrieval()
29
- )
30
-
31
- # Configure the model with the native tool
32
- self.model_name = 'gemini-2.5-pro-preview-06-05' # Using the best stable model
33
  self.model = genai.GenerativeModel(
34
  model_name=self.model_name,
35
- tools=[google_search_retrieval],
36
- system_instruction="You are a world-class problem solver. Your goal is to answer the user's question accurately. Use your tools and reasoning abilities to provide a definitive answer.",
 
 
 
37
  safety_settings={
38
  HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
39
  HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
@@ -41,14 +35,28 @@ class NativeGeminiAgent:
41
  HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
42
  }
43
  )
44
- print(f"Agent equipped with {self.model_name} and native Google Search grounding.")
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def _check_if_file_exists(self, url: str) -> bool:
47
- """Checks if a remote file exists before sending it to Gemini."""
48
  try:
49
- response = requests.head(url, timeout=10)
50
  return response.status_code == 200
51
- except requests.exceptions.RequestException:
 
52
  return False
53
 
54
  def __call__(self, question: str, task_id: str) -> str:
@@ -56,39 +64,51 @@ class NativeGeminiAgent:
56
 
57
  prompt_parts = [question]
58
 
59
- # 1. Automatically find and add any URLs from the question text
60
- urls_in_question = re.findall(r'https?://\S+', question)
61
- if urls_in_question:
62
- for url in urls_in_question:
63
- print(f"Found URL in question: {url}")
64
- # Use a general MIME type that Gemini can often auto-detect for sources like YouTube
65
- prompt_parts.append(genai.Part.from_uri(uri=url, mime_type="video/mp4"))
 
 
66
 
67
- # 2. Check for and add any associated files from the GAIA server
68
  file_url = f"{self.api_url}/files/{task_id}"
69
  if self._check_if_file_exists(file_url):
70
- print(f"Found associated file, adding URL: {file_url}")
71
- # Simple MIME type guessing for common GAIA file types
72
- mime_type = "image/jpeg" # Default
73
- if file_url.endswith('.pdf'): mime_type = "application/pdf"
74
- if file_url.endswith('.txt'): mime_type = "text/plain"
75
- prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
76
- else:
77
- print("No associated file found for this task.")
78
-
79
- print(f"Sending {len(prompt_parts)} parts to the model.")
80
 
81
  try:
82
- response = self.model.generate_content(prompt_parts, request_options={'timeout': 120})
83
-
84
- # The grounding feature may add citations. We remove them for the final answer.
85
- final_answer = re.sub(r'\[\d+\]', '', response.text).strip()
86
- print(f"Model generated answer: {final_answer}")
87
- return final_answer
 
 
 
 
88
 
 
 
 
 
 
 
 
 
 
89
  except Exception as e:
90
- print(f"An error occurred while calling the Gemini API: {e}")
91
- return f"AGENT_ERROR: Could not get a response from the model. Details: {e}"
 
92
 
93
  # --- Main run_and_submit_all function ---
94
  def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -140,8 +160,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
140
 
141
  # --- Gradio Interface ---
142
  with gr.Blocks() as demo:
143
- gr.Markdown("# Native Multi-Modal GAIA Agent")
144
- gr.Markdown("This agent uses Gemini 2.5 Pro with native Google Search grounding and direct multi-modal understanding (video, images, files).")
145
  gr.LoginButton()
146
  run_button = gr.Button("Run Evaluation & Submit All Answers")
147
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
+ # --- User's Corrected NativeGeminiAgent Class ---
14
+ # This is the superior implementation provided by you.
15
  class NativeGeminiAgent:
 
 
 
 
16
  def __init__(self, gemini_api_key: str, api_url: str):
17
+ print("Initializing NativeGeminiAgent with corrected configuration...")
18
  genai.configure(api_key=gemini_api_key)
19
 
20
  self.api_url = api_url
21
+ self.model_name = 'gemini-2.5-pro-preview-06-05' # Using the stable, powerful model
22
 
23
+ # Correct tool configuration using the recommended string-based method
 
 
 
 
 
 
 
24
  self.model = genai.GenerativeModel(
25
  model_name=self.model_name,
26
+ tools=['google_search_retrieval'],
27
+ system_instruction="""You are a world-class problem solver and researcher.
28
+ Analyze the question carefully, use available tools to gather information,
29
+ and provide accurate, concise answers. Focus on factual information and
30
+ avoid speculation.""",
31
  safety_settings={
32
  HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
33
  HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
 
35
  HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
36
  }
37
  )
38
+ print(f"Agent initialized with {self.model_name} and Google Search grounding.")
39
+
40
+ def _get_mime_type(self, url: str) -> str:
41
+ """Enhanced MIME type detection."""
42
+ url_lower = url.lower()
43
+ if url_lower.endswith(('.jpg', '.jpeg')): return "image/jpeg"
44
+ elif url_lower.endswith('.png'): return "image/png"
45
+ elif url_lower.endswith('.gif'): return "image/gif"
46
+ elif url_lower.endswith('.pdf'): return "application/pdf"
47
+ elif url_lower.endswith('.txt'): return "text/plain"
48
+ elif url_lower.endswith('.csv'): return "text/csv"
49
+ elif url_lower.endswith(('.mp4', '.avi', '.mov')): return "video/mp4"
50
+ elif url_lower.endswith('.json'): return "application/json"
51
+ else: return "application/octet-stream"
52
 
53
  def _check_if_file_exists(self, url: str) -> bool:
54
+ """Enhanced file existence check."""
55
  try:
56
+ response = requests.head(url, timeout=15, allow_redirects=True)
57
  return response.status_code == 200
58
+ except requests.exceptions.RequestException as e:
59
+ print(f"File check failed for {url}: {e}")
60
  return False
61
 
62
  def __call__(self, question: str, task_id: str) -> str:
 
64
 
65
  prompt_parts = [question]
66
 
67
+ # Enhanced URL detection
68
+ urls_in_question = re.findall(r'https?://[^\s<>"{}|\\^`\[\]]+', question)
69
+ for url in urls_in_question:
70
+ try:
71
+ mime_type = self._get_mime_type(url)
72
+ prompt_parts.append(genai.Part.from_uri(uri=url, mime_type=mime_type))
73
+ print(f"Added URL: {url} (MIME: {mime_type})")
74
+ except Exception as e:
75
+ print(f"Failed to add URL {url}: {e}")
76
 
77
+ # Check for associated files
78
  file_url = f"{self.api_url}/files/{task_id}"
79
  if self._check_if_file_exists(file_url):
80
+ try:
81
+ mime_type = self._get_mime_type(file_url)
82
+ prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
83
+ print(f"Added file: {file_url} (MIME: {mime_type})")
84
+ except Exception as e:
85
+ print(f"Failed to add file {file_url}: {e}")
 
 
 
 
86
 
87
  try:
88
+ # Use the specified generation config for more stable outputs
89
+ response = self.model.generate_content(
90
+ prompt_parts,
91
+ request_options={'timeout': 120},
92
+ generation_config=genai.types.GenerationConfig(
93
+ temperature=0.1,
94
+ top_p=0.8,
95
+ max_output_tokens=2048
96
+ )
97
+ )
98
 
99
+ if response.text:
100
+ # Thoroughly clean the response text
101
+ final_answer = response.text.strip()
102
+ final_answer = re.sub(r'\[\d+\]', '', final_answer) # Remove citations
103
+ final_answer = re.sub(r'\s+', ' ', final_answer).strip() # Normalize whitespace
104
+ return final_answer
105
+ else:
106
+ return "AGENT_ERROR: Empty response from model"
107
+
108
  except Exception as e:
109
+ error_msg = f"AGENT_ERROR: {str(e)}"
110
+ print(error_msg)
111
+ return error_msg
112
 
113
  # --- Main run_and_submit_all function ---
114
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
160
 
161
  # --- Gradio Interface ---
162
  with gr.Blocks() as demo:
163
+ gr.Markdown("# Native Multi-Modal GAIA Agent (Corrected)")
164
+ gr.Markdown("This agent uses the improved architecture with proper tool configuration, MIME type detection, and error handling.")
165
  gr.LoginButton()
166
  run_button = gr.Button("Run Evaluation & Submit All Answers")
167
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)