File size: 8,282 Bytes
602a16c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b712b2b
602a16c
 
b712b2b
602a16c
 
 
 
 
 
 
 
b712b2b
602a16c
b712b2b
602a16c
 
 
 
b712b2b
602a16c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b712b2b
602a16c
 
b712b2b
 
 
602a16c
 
b712b2b
602a16c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import io
from contextlib import redirect_stdout

from smolagents import (
    CodeAgent, 
    LiteLLMModel, 
    InferenceClientModel,
    DuckDuckGoSearchTool, 
    VisitWebpageTool, 
    WikipediaSearchTool
)

from config import (
    USE_LOCAL_MODEL,
    OLLAMA_MODEL_ID, OLLAMA_API_BASE, OLLAMA_API_KEY,
    HF_MODEL_ID, HF_TOKEN,
    MAX_STEPS, VERBOSITY_LEVEL, AUTHORIZED_IMPORTS,
    QUESTION_TYPES
)
from tools import smart_visit, get_youtube_info
from utils import clean_answer, clean_ansi_codes


class EnhancedAgent:
    """Enhanced agent with question-type specific strategies."""
    
    def __init__(self):
        print(f"   🤖 Initializing agent...")
        
        if USE_LOCAL_MODEL:
            # Usar Ollama local
            self.model = LiteLLMModel(
                model_id=OLLAMA_MODEL_ID, 
                api_base=OLLAMA_API_BASE,
                api_key=OLLAMA_API_KEY
            )
            print(f"   📦 Model: {OLLAMA_MODEL_ID} (local)")
        else:
            # Use HuggingFace API
            self.model = InferenceClientModel(
                model_id=HF_MODEL_ID,
                token=HF_TOKEN
            )
            print(f"   ☁️  Model: {HF_MODEL_ID} (HuggingFace)")

        search_tool = DuckDuckGoSearchTool()
        visit_tool = VisitWebpageTool()
        wiki_tool = WikipediaSearchTool()

        self.agent = CodeAgent(
            tools=[search_tool, visit_tool, wiki_tool, smart_visit, get_youtube_info], 
            model=self.model,
            max_steps=MAX_STEPS,
            verbosity_level=VERBOSITY_LEVEL,
            additional_authorized_imports=AUTHORIZED_IMPORTS
        )
    
    def build_prompt(self, question, local_file, question_type):
        """Construye prompt optimizado según el tipo de pregunta."""
        
        base_context = f"""TASK: You are solving a GAIA benchmark question. Be precise and methodical.

QUESTION: {question}
"""
        
        strategies = {
            QUESTION_TYPES['YOUTUBE_VIDEO']: """
STRATEGY - YouTube Video:
1. Extract the video ID from the URL in the question
2. Use get_youtube_info tool to get context
3. Search DuckDuckGo for: "[video_id] transcript" or "[video_id] [keywords_from_question]"
4. Look for Reddit threads, forums, or blogs discussing this video
5. Find the specific information requested

IMPORTANT: You CANNOT watch the video. Search for transcripts or discussions online.
""",
            
            QUESTION_TYPES['IMAGE_FILE']: f"""
STRATEGY - Image File:
1. File '{local_file}' is in current directory
2. You CANNOT read image files directly with Python
3. Search online for: "{local_file}" OR search for keywords from the question
4. Look for discussions, analysis, or descriptions of this image online
5. For chess positions: search "[piece positions] chess position solution"

IMPORTANT: Do NOT attempt cv2, PIL, or any image processing. Search online instead.
""",
            
            QUESTION_TYPES['AUDIO_FILE']: f"""
STRATEGY - Audio File:
1. File '{local_file}' is in current directory
2. You CANNOT play or transcribe audio with Python
3. Search online for: "{local_file}" OR the exact question text
4. Look for transcripts, Reddit threads, or forums discussing this audio

IMPORTANT: Do NOT attempt librosa, soundfile, or audio processing. Search online.
""",
            
            QUESTION_TYPES['DATA_FILE']: f"""
STRATEGY - Data File (Excel/CSV):
1. File '{local_file}' is in current directory
2. Use pandas to read: pd.read_excel('{local_file}') or pd.read_csv('{local_file}')
3. Explore columns with df.columns and df.head()
4. Filter and sum/count as needed
5. Double-check calculations

CODE TEMPLATE:
```python
import pandas as pd
df = pd.read_excel('{local_file}')  # or read_csv
print(df.columns)
print(df.head())
# ... your analysis
```
""",
            
            QUESTION_TYPES['CODE_FILE']: f"""
STRATEGY - Code File:
1. File '{local_file}' is in current directory
2. Read it with open('{local_file}', 'r').read()
3. Analyze the code logic carefully
4. If needed, execute it: exec(open('{local_file}').read())
5. Return the requested output

IMPORTANT: Read and understand before executing.
""",
            
            QUESTION_TYPES['WIKIPEDIA']: """
STRATEGY - Wikipedia Search:
1. Identify the exact topic/entity from the question
2. Use web_search to find the correct Wikipedia article URL
3. Use smart_visit to read the Wikipedia page content
4. Extract the specific information requested (dates, numbers, names, etc.)
5. For counting tasks: CREATE A PYTHON LIST with each item, then count with len()

TIPS:
- Search: "[topic] Wikipedia 2022" for latest version
- For discographies: look for "Discography" section or table
- For featured articles: search "Wikipedia Featured Article [topic] [date]"
- ALWAYS create a list and count programmatically, don't count manually
""",
            
            QUESTION_TYPES['COUNTING']: """
STRATEGY - Counting Task:
1. Research and LIST all items first (don't just count)
2. Use smart_visit to get complete data from Wikipedia or official sources
3. Store items in a Python list: items = []
4. Count with len(items) and verify manually
5. Double-check you haven't missed anything

IMPORTANT: First collect ALL items, THEN count. Show your work.
""",
            
            QUESTION_TYPES['TEXT_MANIPULATION']: """
STRATEGY - Text Manipulation:
1. Read the question VERY carefully
2. If text is backwards, reverse it: text[::-1]
3. If asking for opposite: use logic (left ↔ right, up ↔ down, etc.)
4. Return ONLY the answer, no explanation

EXAMPLE: ".rewsna eht sa 'tfel' drow..." 
→ Reverse to read: "...word 'left' as the answer."
→ Opposite of "left" is "right"
""",
            
            QUESTION_TYPES['GENERAL']: """
STRATEGY - General Research:
1. Break down the question into sub-tasks
2. Use web_search for initial research
3. Use smart_visit to read relevant pages in detail
4. Cross-reference multiple sources if needed
5. Extract the precise answer requested

TIPS:
- Be specific in searches: include years, full names, exact terms
- Read carefully - answers are often in tables, lists, or footnotes
"""
        }
        
        strategy = strategies.get(question_type, strategies[QUESTION_TYPES['GENERAL']])
        
        output_format = """
FINAL OUTPUT FORMAT:
Return ONLY the answer value. No markdown, no "The answer is", no explanations.

Examples of GOOD answers:
- "3"
- "right"
- "Ian Rose"
- "14.50"
- "d5, e2"

Examples of BAD answers:
- "The answer is 3"
- "**3**"
- "Based on my research, the answer is 3."
"""
        
        return base_context + strategy + output_format
    
    def solve(self, question, local_file=None, question_type=None):
        """
        Solve a question using an optimized strategy.
        
        Args:
            question: The question text
            local_file: Path to attached file (optional)
            question_type: Detected question type
            
        Returns:
            tuple: (answer, execution logs)
        """
        if question_type is None:
            question_type = QUESTION_TYPES['GENERAL']
            
        prompt = self.build_prompt(question, local_file, question_type)
        
        log_capture = io.StringIO()
        final_answer = "Error"
        
        try:
            with redirect_stdout(log_capture):
                answer = self.agent.run(prompt)
                final_answer = clean_answer(answer)
                
                # Si está vacío después de limpiar, buscar en logs
                if not final_answer or final_answer == "Error":
                    logs = log_capture.getvalue()
                    for line in reversed(logs.split('\n')):
                        if line.strip() and not any(x in line for x in ['===', '---', 'Step', 'Tool']):
                            potential_answer = line.strip()
                            if len(potential_answer) < 200:
                                final_answer = potential_answer
                                break
                
        except Exception as e:
            log_capture.write(f"\n❌ CRITICAL ERROR: {e}\n")
            final_answer = "Error"
        
        return final_answer, clean_ansi_codes(log_capture.getvalue())