File size: 9,178 Bytes
10e9b7d
 
eccf8e4
0b26a35
 
ee57d8e
 
 
10e9b7d
30ab757
3db6293
75a1136
0b26a35
75a1136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b26a35
30ab757
ee57d8e
75a1136
 
 
 
30ab757
75a1136
ee57d8e
 
75a1136
 
 
ee57d8e
 
75a1136
ee57d8e
 
75a1136
dceeb49
 
75a1136
dceeb49
 
 
 
75a1136
dceeb49
ee57d8e
 
 
dceeb49
ee57d8e
75a1136
ee57d8e
4e55bbe
0b26a35
ee57d8e
0b26a35
75a1136
 
 
 
 
 
ee57d8e
dceeb49
 
 
 
 
75a1136
 
ee57d8e
 
 
dceeb49
 
 
75a1136
 
 
 
 
 
 
 
 
 
dceeb49
75a1136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dceeb49
75a1136
 
dceeb49
75a1136
0b26a35
578f455
0b26a35
 
75a1136
e3c5ce5
3c4371f
75a1136
578f455
75a1136
 
0b26a35
7e4a06b
31243f4
75a1136
e3c5ce5
 
e80aab9
e3c5ce5
31243f4
e3c5ce5
 
31243f4
0b26a35
7d65c66
 
31243f4
75a1136
4e55bbe
75a1136
ee57d8e
31243f4
e3c5ce5
 
 
7d65c66
e3c5ce5
e80aab9
e3c5ce5
e80aab9
 
e3c5ce5
 
 
 
 
 
 
 
ee57d8e
e80aab9
75a1136
 
7e4a06b
31243f4
9088b99
7d65c66
e3c5ce5
e80aab9
 
3c4371f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import os
import gradio as gr
import requests
import re
import time
import pandas as pd
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MAX_ITERATIONS = 7

# --- HYBRID: Re-introducing the WebSearchTool ---
class WebSearchTool:
    """A tool to search the web using the Perplexity API."""
    def __init__(self, api_key):
        self.api_key = api_key
        self.url = "https://api.perplexity.ai/chat/completions"
        print("WebSearchTool initialized.")
    def execute(self, query: str) -> str:
        print(f"Executing WebSearchTool with query: {query}")
        payload = {"model": "llama-3-sonar-small-32k-online", "messages": [{"role": "system", "content": "You are a research assistant. Provide a precise and factual answer to the query."}, {"role": "user", "content": query}]}
        headers = {"accept": "application/json", "content-type": "application/json", "Authorization": f"Bearer {self.api_key}"}
        try:
            response = requests.post(self.url, json=payload, headers=headers, timeout=40)
            response.raise_for_status()
            return response.json()['choices'][0]['message']['content']
        except requests.exceptions.RequestException as e:
            return f"Error: Web search failed. {e}"

# --- The New Hybrid Agent ---
class HybridAgent:
    def __init__(self, gemini_api_key: str, pplx_api_key: str, api_url: str):
        print("Initializing HybridAgent...")
        genai.configure(api_key=gemini_api_key)
        
        self.api_url = api_url
        self.web_search_tool = WebSearchTool(pplx_api_key)
        
        # Using the stable, powerful model that we know works.
        self.model_name = 'gemini-2.5-flash-preview-05-20'
        
        # HYBRID: We do NOT enable the native search tool, as it's not supported by all models.
        self.model = genai.GenerativeModel(
            model_name=self.model_name,
            system_instruction="""You are a powerful reasoning agent. You can understand files and URLs provided to you directly. 
For general web searches or to find new information, you MUST use the `WebSearch` tool.
Follow the ReAct format: Thought, Action, Observation, Final Answer.""",
            safety_settings={
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
                # Add other categories as needed
            }
        )
        print(f"Agent initialized with {self.model_name} and an external WebSearchTool.")

    def _get_mime_type(self, url: str) -> str:
        # (Using the robust MIME type detection from your last recommendation)
        url_lower = url.lower()
        if url_lower.endswith(('.jpg', '.jpeg')): return "image/jpeg"
        elif url_lower.endswith('.png'): return "image/png"
        elif url_lower.endswith('.pdf'): return "application/pdf"
        # Add other types as needed...
        else: return "application/octet-stream"

    def _check_if_file_exists(self, url: str) -> bool:
        try:
            response = requests.head(url, timeout=15, allow_redirects=True)
            return response.status_code == 200
        except requests.exceptions.RequestException:
            return False

    def __call__(self, question: str, task_id: str) -> str:
        print(f"\n{'='*20}\nProcessing Task ID: {task_id}")
        
        # --- HYBRID: Multi-modal part preparation ---
        prompt_parts = [
            "You will solve the following question. You have been provided with the question and any relevant files or URLs.",
            "Remember, for web searches, you must use the `WebSearch` tool in the ReAct format (Thought, Action, Observation).",
            f"\n--- QUESTION ---\n{question}"
        ]
        
        urls_in_question = re.findall(r'https?://[^\s<>"{}|\\^`\[\]]+', question)
        for url in urls_in_question:
            try:
                mime_type = self._get_mime_type(url)
                prompt_parts.append(genai.Part.from_uri(uri=url, mime_type=mime_type))
                print(f"Appended URL to prompt parts: {url}")
            except Exception as e: print(f"Failed to add URL {url}: {e}")

        file_url = f"{self.api_url}/files/{task_id}"
        if self._check_if_file_exists(file_url):
            try:
                mime_type = self._get_mime_type(file_url)
                prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
                print(f"Appended file to prompt parts: {file_url}")
            except Exception as e: print(f"Failed to add file {file_url}: {e}")
        
        # --- HYBRID: ReAct Loop ---
        for i in range(MAX_ITERATIONS):
            print(f"\n--- Hybrid Iteration {i+1} ---")
            try:
                response = self.model.generate_content(
                    prompt_parts,
                    generation_config=genai.types.GenerationConfig(temperature=0.1)
                )
                response_text = response.text
            except Exception as e: return f"AGENT_ERROR: {e}"

            print(f"LLM Response:\n{response_text}")

            final_answer_match = re.search(r"Final Answer:\s*(.*)", response_text, re.DOTALL)
            if final_answer_match:
                return final_answer_match.group(1).strip()

            action_match = re.search(r"Action:\s*WebSearch\[(.*?)\]", response_text, re.DOTALL)
            if action_match:
                query = action_match.group(1).strip()
                observation = self.web_search_tool.execute(query)
                prompt_parts.append(f"\nThought: {response_text.split('Thought:')[1]}")
                prompt_parts.append(f"Observation: {observation}")
            else:
                # If the model gives a direct answer without the "Final Answer:" tag
                return response_text.strip()
                
        return "AGENT_ERROR: Max iterations reached."

# --- Main run_and_submit_all function ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if not profile: return "Please Login to Hugging Face.", None
    username = f"{profile.username}"

    # HYBRID: We need both keys again.
    gemini_key = os.getenv("GEMINI_API_KEY")
    pplx_key = os.getenv("PPLX_API_KEY")
    if not gemini_key or not pplx_key: return "CRITICAL ERROR: GEMINI_API_KEY or PPLX_API_KEY not found.", None
    
    api_url = DEFAULT_API_URL
    try:
        agent = HybridAgent(gemini_api_key=gemini_key, pplx_api_key=pplx_key, api_url=api_url)
        questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
    except Exception as e: return f"Error during setup: {e}", None

    results_log, answers_payload = [], []
    for item in questions_data:
        task_id, question_text = item.get("task_id"), item.get("question")
        if not task_id or question_text is None: continue
        try:
            submitted_answer = agent(question_text, task_id)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT CRASH: {e}"})
        
        print(f"--- Waiting for 10 seconds... ---")
        time.sleep(10)

    if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
    
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    
    try:
        response = requests.post(f"{api_url}/submit", json=submission_data, timeout=120)
        response.raise_for_status()
        result_data = response.json()
        final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
                        f"Overall Score: {result_data.get('score', 'N/A')}% "
                        f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
                        f"Message: {result_data.get('message', 'No message received.')}")
        return final_status, pd.DataFrame(results_log)
    except requests.exceptions.RequestException as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Hybrid GAIA Agent")
    gr.Markdown("This agent uses Gemini 1.5 Pro's native multi-modality (files, URLs) combined with an external Perplexity web search tool.")
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)