jonathan9879 commited on
Commit
75a1136
·
verified ·
1 Parent(s): 39af7e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -66
app.py CHANGED
@@ -9,119 +9,136 @@ from google.generativeai.types import HarmCategory, HarmBlockThreshold
9
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
12
 
13
- # --- User's Corrected NativeGeminiAgent Class ---
14
- # This is the superior implementation provided by you.
15
- class NativeGeminiAgent:
16
- def __init__(self, gemini_api_key: str, api_url: str):
17
- print("Initializing NativeGeminiAgent with corrected configuration...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  genai.configure(api_key=gemini_api_key)
19
 
20
  self.api_url = api_url
21
- self.model_name = 'gemini-2.5-flash-preview-05-20' # Using the stable, powerful model
 
 
 
22
 
23
- # Correct tool configuration using the recommended string-based method
24
  self.model = genai.GenerativeModel(
25
  model_name=self.model_name,
26
- tools=['google_search_retrieval'],
27
- system_instruction="""You are a world-class problem solver and researcher.
28
- Analyze the question carefully, use available tools to gather information,
29
- and provide accurate, concise answers. Focus on factual information and
30
- avoid speculation.""",
31
  safety_settings={
32
  HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
33
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
34
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
35
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
36
  }
37
  )
38
- print(f"Agent initialized with {self.model_name} and Google Search grounding.")
39
 
40
  def _get_mime_type(self, url: str) -> str:
41
- """Enhanced MIME type detection."""
42
  url_lower = url.lower()
43
  if url_lower.endswith(('.jpg', '.jpeg')): return "image/jpeg"
44
  elif url_lower.endswith('.png'): return "image/png"
45
- elif url_lower.endswith('.gif'): return "image/gif"
46
  elif url_lower.endswith('.pdf'): return "application/pdf"
47
- elif url_lower.endswith('.txt'): return "text/plain"
48
- elif url_lower.endswith('.csv'): return "text/csv"
49
- elif url_lower.endswith(('.mp4', '.avi', '.mov')): return "video/mp4"
50
- elif url_lower.endswith('.json'): return "application/json"
51
  else: return "application/octet-stream"
52
 
53
  def _check_if_file_exists(self, url: str) -> bool:
54
- """Enhanced file existence check."""
55
  try:
56
  response = requests.head(url, timeout=15, allow_redirects=True)
57
  return response.status_code == 200
58
- except requests.exceptions.RequestException as e:
59
- print(f"File check failed for {url}: {e}")
60
  return False
61
 
62
  def __call__(self, question: str, task_id: str) -> str:
63
  print(f"\n{'='*20}\nProcessing Task ID: {task_id}")
64
 
65
- prompt_parts = [question]
 
 
 
 
 
66
 
67
- # Enhanced URL detection
68
  urls_in_question = re.findall(r'https?://[^\s<>"{}|\\^`\[\]]+', question)
69
  for url in urls_in_question:
70
  try:
71
  mime_type = self._get_mime_type(url)
72
  prompt_parts.append(genai.Part.from_uri(uri=url, mime_type=mime_type))
73
- print(f"Added URL: {url} (MIME: {mime_type})")
74
- except Exception as e:
75
- print(f"Failed to add URL {url}: {e}")
76
 
77
- # Check for associated files
78
  file_url = f"{self.api_url}/files/{task_id}"
79
  if self._check_if_file_exists(file_url):
80
  try:
81
  mime_type = self._get_mime_type(file_url)
82
  prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
83
- print(f"Added file: {file_url} (MIME: {mime_type})")
84
- except Exception as e:
85
- print(f"Failed to add file {file_url}: {e}")
86
-
87
- try:
88
- # Use the specified generation config for more stable outputs
89
- response = self.model.generate_content(
90
- prompt_parts,
91
- request_options={'timeout': 120},
92
- generation_config=genai.types.GenerationConfig(
93
- temperature=0.1,
94
- top_p=0.8,
95
- max_output_tokens=2048
96
  )
97
- )
98
-
99
- if response.text:
100
- # Thoroughly clean the response text
101
- final_answer = response.text.strip()
102
- final_answer = re.sub(r'\[\d+\]', '', final_answer) # Remove citations
103
- final_answer = re.sub(r'\s+', ' ', final_answer).strip() # Normalize whitespace
104
- return final_answer
 
 
 
 
 
 
 
105
  else:
106
- return "AGENT_ERROR: Empty response from model"
 
107
 
108
- except Exception as e:
109
- error_msg = f"AGENT_ERROR: {str(e)}"
110
- print(error_msg)
111
- return error_msg
112
 
113
  # --- Main run_and_submit_all function ---
114
  def run_and_submit_all(profile: gr.OAuthProfile | None):
115
  space_id = os.getenv("SPACE_ID")
116
- if not profile: return "Please Login to Hugging Face with the button.", None
117
  username = f"{profile.username}"
118
 
 
119
  gemini_key = os.getenv("GEMINI_API_KEY")
120
- if not gemini_key: return "CRITICAL ERROR: GEMINI_API_KEY not found in Space secrets.", None
 
121
 
122
  api_url = DEFAULT_API_URL
123
  try:
124
- agent = NativeGeminiAgent(gemini_api_key=gemini_key, api_url=api_url)
125
  questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
126
  except Exception as e: return f"Error during setup: {e}", None
127
 
@@ -134,11 +151,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
134
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
135
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
136
  except Exception as e:
137
- error_message = f"AGENT CRASH: {e}"
138
- print(error_message)
139
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_message})
140
 
141
- print(f"--- Waiting for 10 seconds before next question... ---")
142
  time.sleep(10)
143
 
144
  if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
@@ -160,8 +175,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
160
 
161
  # --- Gradio Interface ---
162
  with gr.Blocks() as demo:
163
- gr.Markdown("# Native Multi-Modal GAIA Agent (Corrected)")
164
- gr.Markdown("This agent uses the improved architecture with proper tool configuration, MIME type detection, and error handling.")
165
  gr.LoginButton()
166
  run_button = gr.Button("Run Evaluation & Submit All Answers")
167
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
9
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+ MAX_ITERATIONS = 7
13
 
14
+ # --- HYBRID: Re-introducing the WebSearchTool ---
15
+ class WebSearchTool:
16
+ """A tool to search the web using the Perplexity API."""
17
+ def __init__(self, api_key):
18
+ self.api_key = api_key
19
+ self.url = "https://api.perplexity.ai/chat/completions"
20
+ print("WebSearchTool initialized.")
21
+ def execute(self, query: str) -> str:
22
+ print(f"Executing WebSearchTool with query: {query}")
23
+ payload = {"model": "llama-3-sonar-small-32k-online", "messages": [{"role": "system", "content": "You are a research assistant. Provide a precise and factual answer to the query."}, {"role": "user", "content": query}]}
24
+ headers = {"accept": "application/json", "content-type": "application/json", "Authorization": f"Bearer {self.api_key}"}
25
+ try:
26
+ response = requests.post(self.url, json=payload, headers=headers, timeout=40)
27
+ response.raise_for_status()
28
+ return response.json()['choices'][0]['message']['content']
29
+ except requests.exceptions.RequestException as e:
30
+ return f"Error: Web search failed. {e}"
31
+
32
+ # --- The New Hybrid Agent ---
33
+ class HybridAgent:
34
+ def __init__(self, gemini_api_key: str, pplx_api_key: str, api_url: str):
35
+ print("Initializing HybridAgent...")
36
  genai.configure(api_key=gemini_api_key)
37
 
38
  self.api_url = api_url
39
+ self.web_search_tool = WebSearchTool(pplx_api_key)
40
+
41
+ # Using the stable, powerful model that we know works.
42
+ self.model_name = 'gemini-2.5-flash-preview-05-20'
43
 
44
+ # HYBRID: We do NOT enable the native search tool, as it's not supported by all models.
45
  self.model = genai.GenerativeModel(
46
  model_name=self.model_name,
47
+ system_instruction="""You are a powerful reasoning agent. You can understand files and URLs provided to you directly.
48
+ For general web searches or to find new information, you MUST use the `WebSearch` tool.
49
+ Follow the ReAct format: Thought, Action, Observation, Final Answer.""",
 
 
50
  safety_settings={
51
  HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
52
+ # Add other categories as needed
 
 
53
  }
54
  )
55
+ print(f"Agent initialized with {self.model_name} and an external WebSearchTool.")
56
 
57
  def _get_mime_type(self, url: str) -> str:
58
+ # (Using the robust MIME type detection from your last recommendation)
59
  url_lower = url.lower()
60
  if url_lower.endswith(('.jpg', '.jpeg')): return "image/jpeg"
61
  elif url_lower.endswith('.png'): return "image/png"
 
62
  elif url_lower.endswith('.pdf'): return "application/pdf"
63
+ # Add other types as needed...
 
 
 
64
  else: return "application/octet-stream"
65
 
66
  def _check_if_file_exists(self, url: str) -> bool:
 
67
  try:
68
  response = requests.head(url, timeout=15, allow_redirects=True)
69
  return response.status_code == 200
70
+ except requests.exceptions.RequestException:
 
71
  return False
72
 
73
  def __call__(self, question: str, task_id: str) -> str:
74
  print(f"\n{'='*20}\nProcessing Task ID: {task_id}")
75
 
76
+ # --- HYBRID: Multi-modal part preparation ---
77
+ prompt_parts = [
78
+ "You will solve the following question. You have been provided with the question and any relevant files or URLs.",
79
+ "Remember, for web searches, you must use the `WebSearch` tool in the ReAct format (Thought, Action, Observation).",
80
+ f"\n--- QUESTION ---\n{question}"
81
+ ]
82
 
 
83
  urls_in_question = re.findall(r'https?://[^\s<>"{}|\\^`\[\]]+', question)
84
  for url in urls_in_question:
85
  try:
86
  mime_type = self._get_mime_type(url)
87
  prompt_parts.append(genai.Part.from_uri(uri=url, mime_type=mime_type))
88
+ print(f"Appended URL to prompt parts: {url}")
89
+ except Exception as e: print(f"Failed to add URL {url}: {e}")
 
90
 
 
91
  file_url = f"{self.api_url}/files/{task_id}"
92
  if self._check_if_file_exists(file_url):
93
  try:
94
  mime_type = self._get_mime_type(file_url)
95
  prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
96
+ print(f"Appended file to prompt parts: {file_url}")
97
+ except Exception as e: print(f"Failed to add file {file_url}: {e}")
98
+
99
+ # --- HYBRID: ReAct Loop ---
100
+ for i in range(MAX_ITERATIONS):
101
+ print(f"\n--- Hybrid Iteration {i+1} ---")
102
+ try:
103
+ response = self.model.generate_content(
104
+ prompt_parts,
105
+ generation_config=genai.types.GenerationConfig(temperature=0.1)
 
 
 
106
  )
107
+ response_text = response.text
108
+ except Exception as e: return f"AGENT_ERROR: {e}"
109
+
110
+ print(f"LLM Response:\n{response_text}")
111
+
112
+ final_answer_match = re.search(r"Final Answer:\s*(.*)", response_text, re.DOTALL)
113
+ if final_answer_match:
114
+ return final_answer_match.group(1).strip()
115
+
116
+ action_match = re.search(r"Action:\s*WebSearch\[(.*?)\]", response_text, re.DOTALL)
117
+ if action_match:
118
+ query = action_match.group(1).strip()
119
+ observation = self.web_search_tool.execute(query)
120
+ prompt_parts.append(f"\nThought: {response_text.split('Thought:')[1]}")
121
+ prompt_parts.append(f"Observation: {observation}")
122
  else:
123
+ # If the model gives a direct answer without the "Final Answer:" tag
124
+ return response_text.strip()
125
 
126
+ return "AGENT_ERROR: Max iterations reached."
 
 
 
127
 
128
  # --- Main run_and_submit_all function ---
129
  def run_and_submit_all(profile: gr.OAuthProfile | None):
130
  space_id = os.getenv("SPACE_ID")
131
+ if not profile: return "Please Login to Hugging Face.", None
132
  username = f"{profile.username}"
133
 
134
+ # HYBRID: We need both keys again.
135
  gemini_key = os.getenv("GEMINI_API_KEY")
136
+ pplx_key = os.getenv("PPLX_API_KEY")
137
+ if not gemini_key or not pplx_key: return "CRITICAL ERROR: GEMINI_API_KEY or PPLX_API_KEY not found.", None
138
 
139
  api_url = DEFAULT_API_URL
140
  try:
141
+ agent = HybridAgent(gemini_api_key=gemini_key, pplx_api_key=pplx_key, api_url=api_url)
142
  questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
143
  except Exception as e: return f"Error during setup: {e}", None
144
 
 
151
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
152
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
153
  except Exception as e:
154
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT CRASH: {e}"})
 
 
155
 
156
+ print(f"--- Waiting for 10 seconds... ---")
157
  time.sleep(10)
158
 
159
  if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
175
 
176
  # --- Gradio Interface ---
177
  with gr.Blocks() as demo:
178
+ gr.Markdown("# Hybrid GAIA Agent")
179
+ gr.Markdown("This agent uses Gemini 1.5 Pro's native multi-modality (files, URLs) combined with an external Perplexity web search tool.")
180
  gr.LoginButton()
181
  run_button = gr.Button("Run Evaluation & Submit All Answers")
182
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)