Spaces:
Sleeping
Sleeping
File size: 8,282 Bytes
602a16c b712b2b 602a16c b712b2b 602a16c b712b2b 602a16c b712b2b 602a16c b712b2b 602a16c b712b2b 602a16c b712b2b 602a16c b712b2b 602a16c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
import io
from contextlib import redirect_stdout
from smolagents import (
CodeAgent,
LiteLLMModel,
InferenceClientModel,
DuckDuckGoSearchTool,
VisitWebpageTool,
WikipediaSearchTool
)
from config import (
USE_LOCAL_MODEL,
OLLAMA_MODEL_ID, OLLAMA_API_BASE, OLLAMA_API_KEY,
HF_MODEL_ID, HF_TOKEN,
MAX_STEPS, VERBOSITY_LEVEL, AUTHORIZED_IMPORTS,
QUESTION_TYPES
)
from tools import smart_visit, get_youtube_info
from utils import clean_answer, clean_ansi_codes
class EnhancedAgent:
"""Enhanced agent with question-type specific strategies."""
def __init__(self):
print(f" 🤖 Initializing agent...")
if USE_LOCAL_MODEL:
# Usar Ollama local
self.model = LiteLLMModel(
model_id=OLLAMA_MODEL_ID,
api_base=OLLAMA_API_BASE,
api_key=OLLAMA_API_KEY
)
print(f" 📦 Model: {OLLAMA_MODEL_ID} (local)")
else:
# Use HuggingFace API
self.model = InferenceClientModel(
model_id=HF_MODEL_ID,
token=HF_TOKEN
)
print(f" ☁️ Model: {HF_MODEL_ID} (HuggingFace)")
search_tool = DuckDuckGoSearchTool()
visit_tool = VisitWebpageTool()
wiki_tool = WikipediaSearchTool()
self.agent = CodeAgent(
tools=[search_tool, visit_tool, wiki_tool, smart_visit, get_youtube_info],
model=self.model,
max_steps=MAX_STEPS,
verbosity_level=VERBOSITY_LEVEL,
additional_authorized_imports=AUTHORIZED_IMPORTS
)
def build_prompt(self, question, local_file, question_type):
"""Construye prompt optimizado según el tipo de pregunta."""
base_context = f"""TASK: You are solving a GAIA benchmark question. Be precise and methodical.
QUESTION: {question}
"""
strategies = {
QUESTION_TYPES['YOUTUBE_VIDEO']: """
STRATEGY - YouTube Video:
1. Extract the video ID from the URL in the question
2. Use get_youtube_info tool to get context
3. Search DuckDuckGo for: "[video_id] transcript" or "[video_id] [keywords_from_question]"
4. Look for Reddit threads, forums, or blogs discussing this video
5. Find the specific information requested
IMPORTANT: You CANNOT watch the video. Search for transcripts or discussions online.
""",
QUESTION_TYPES['IMAGE_FILE']: f"""
STRATEGY - Image File:
1. File '{local_file}' is in current directory
2. You CANNOT read image files directly with Python
3. Search online for: "{local_file}" OR search for keywords from the question
4. Look for discussions, analysis, or descriptions of this image online
5. For chess positions: search "[piece positions] chess position solution"
IMPORTANT: Do NOT attempt cv2, PIL, or any image processing. Search online instead.
""",
QUESTION_TYPES['AUDIO_FILE']: f"""
STRATEGY - Audio File:
1. File '{local_file}' is in current directory
2. You CANNOT play or transcribe audio with Python
3. Search online for: "{local_file}" OR the exact question text
4. Look for transcripts, Reddit threads, or forums discussing this audio
IMPORTANT: Do NOT attempt librosa, soundfile, or audio processing. Search online.
""",
QUESTION_TYPES['DATA_FILE']: f"""
STRATEGY - Data File (Excel/CSV):
1. File '{local_file}' is in current directory
2. Use pandas to read: pd.read_excel('{local_file}') or pd.read_csv('{local_file}')
3. Explore columns with df.columns and df.head()
4. Filter and sum/count as needed
5. Double-check calculations
CODE TEMPLATE:
```python
import pandas as pd
df = pd.read_excel('{local_file}') # or read_csv
print(df.columns)
print(df.head())
# ... your analysis
```
""",
QUESTION_TYPES['CODE_FILE']: f"""
STRATEGY - Code File:
1. File '{local_file}' is in current directory
2. Read it with open('{local_file}', 'r').read()
3. Analyze the code logic carefully
4. If needed, execute it: exec(open('{local_file}').read())
5. Return the requested output
IMPORTANT: Read and understand before executing.
""",
QUESTION_TYPES['WIKIPEDIA']: """
STRATEGY - Wikipedia Search:
1. Identify the exact topic/entity from the question
2. Use web_search to find the correct Wikipedia article URL
3. Use smart_visit to read the Wikipedia page content
4. Extract the specific information requested (dates, numbers, names, etc.)
5. For counting tasks: CREATE A PYTHON LIST with each item, then count with len()
TIPS:
- Search: "[topic] Wikipedia 2022" for latest version
- For discographies: look for "Discography" section or table
- For featured articles: search "Wikipedia Featured Article [topic] [date]"
- ALWAYS create a list and count programmatically, don't count manually
""",
QUESTION_TYPES['COUNTING']: """
STRATEGY - Counting Task:
1. Research and LIST all items first (don't just count)
2. Use smart_visit to get complete data from Wikipedia or official sources
3. Store items in a Python list: items = []
4. Count with len(items) and verify manually
5. Double-check you haven't missed anything
IMPORTANT: First collect ALL items, THEN count. Show your work.
""",
QUESTION_TYPES['TEXT_MANIPULATION']: """
STRATEGY - Text Manipulation:
1. Read the question VERY carefully
2. If text is backwards, reverse it: text[::-1]
3. If asking for opposite: use logic (left ↔ right, up ↔ down, etc.)
4. Return ONLY the answer, no explanation
EXAMPLE: ".rewsna eht sa 'tfel' drow..."
→ Reverse to read: "...word 'left' as the answer."
→ Opposite of "left" is "right"
""",
QUESTION_TYPES['GENERAL']: """
STRATEGY - General Research:
1. Break down the question into sub-tasks
2. Use web_search for initial research
3. Use smart_visit to read relevant pages in detail
4. Cross-reference multiple sources if needed
5. Extract the precise answer requested
TIPS:
- Be specific in searches: include years, full names, exact terms
- Read carefully - answers are often in tables, lists, or footnotes
"""
}
strategy = strategies.get(question_type, strategies[QUESTION_TYPES['GENERAL']])
output_format = """
FINAL OUTPUT FORMAT:
Return ONLY the answer value. No markdown, no "The answer is", no explanations.
Examples of GOOD answers:
- "3"
- "right"
- "Ian Rose"
- "14.50"
- "d5, e2"
Examples of BAD answers:
- "The answer is 3"
- "**3**"
- "Based on my research, the answer is 3."
"""
return base_context + strategy + output_format
def solve(self, question, local_file=None, question_type=None):
"""
Solve a question using an optimized strategy.
Args:
question: The question text
local_file: Path to attached file (optional)
question_type: Detected question type
Returns:
tuple: (answer, execution logs)
"""
if question_type is None:
question_type = QUESTION_TYPES['GENERAL']
prompt = self.build_prompt(question, local_file, question_type)
log_capture = io.StringIO()
final_answer = "Error"
try:
with redirect_stdout(log_capture):
answer = self.agent.run(prompt)
final_answer = clean_answer(answer)
# Si está vacío después de limpiar, buscar en logs
if not final_answer or final_answer == "Error":
logs = log_capture.getvalue()
for line in reversed(logs.split('\n')):
if line.strip() and not any(x in line for x in ['===', '---', 'Step', 'Tool']):
potential_answer = line.strip()
if len(potential_answer) < 200:
final_answer = potential_answer
break
except Exception as e:
log_capture.write(f"\n❌ CRITICAL ERROR: {e}\n")
final_answer = "Error"
return final_answer, clean_ansi_codes(log_capture.getvalue())
|