Update app.py
Browse files
app.py
CHANGED
|
@@ -64,164 +64,228 @@ def youtube_captions(self, url):
|
|
| 64 |
from openai import OpenAI
|
| 65 |
|
| 66 |
class BasicAgent:
|
| 67 |
-
def __call__(self, question, file_url=None):
|
| 68 |
-
return self.agent_loop(question, file_url)
|
| 69 |
def __init__(self):
|
| 70 |
print("π Super GAIA Agent initialized")
|
| 71 |
self.client = OpenAI()
|
| 72 |
-
# Initialize Whisper model once to avoid reloading in the loop
|
| 73 |
self.audio_model = whisper.load_model("base")
|
| 74 |
-
def read_audio(self, file_url):
|
| 75 |
-
try:
|
| 76 |
-
r = requests.get(file_url, timeout=20)
|
| 77 |
-
with open("temp_audio.mp3", "wb") as f: f.write(r.content)
|
| 78 |
-
result = self.audio_model.transcribe("temp_audio.mp3")
|
| 79 |
-
return result
|
| 80 |
-
except Exception as e:
|
| 81 |
-
return f"Audio error: {str(e)}"
|
| 82 |
-
def download_file(self, url):
|
| 83 |
-
if not url or not url.startswith("http"):
|
| 84 |
-
return None
|
| 85 |
-
try:
|
| 86 |
-
r = requests.get(url, timeout=20)
|
| 87 |
-
file_name = url.split("/")[-1] or "temp_file"
|
| 88 |
-
with open(file_name, "wb") as f:
|
| 89 |
-
f.write(r.content)
|
| 90 |
-
return file_name
|
| 91 |
-
except Exception as e:
|
| 92 |
-
print(f"Download error: {e}")
|
| 93 |
-
return None
|
| 94 |
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
| 96 |
def wiki_search(self, query):
|
| 97 |
try:
|
| 98 |
query = query.strip(' ".,')
|
| 99 |
-
#
|
| 100 |
-
|
| 101 |
"https://en.wikipedia.org/w/api.php",
|
| 102 |
params={"action": "query", "list": "search", "srsearch": query,
|
| 103 |
-
"format": "json", "srlimit":
|
| 104 |
timeout=10
|
| 105 |
).json()
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
#
|
| 111 |
summary = requests.get(
|
| 112 |
-
f"https://en.wikipedia.org/api/rest_v1/page/summary/{
|
| 113 |
timeout=10
|
| 114 |
).json()
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
| 116 |
except Exception as e:
|
| 117 |
-
return f"Wiki error: {
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
try:
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
if tool == "read_image": return f"IMAGE_CONTENT: {pytesseract.image_to_string(Image.open(f'temp.{ext}'))}"
|
| 136 |
-
if tool == "read_excel": return f"EXCEL_DATA: {pd.read_excel(f'temp.{ext}').to_string()[:3000]}"
|
| 137 |
-
if tool == "read_audio": return f"TRANSCRIPT: {self.audio_model.transcribe(f'temp.{ext}')}"
|
| 138 |
-
|
| 139 |
-
if tool == "scrape_page":
|
| 140 |
-
soup = BeautifulSoup(requests.get(input_data, timeout=10).text, "html.parser")
|
| 141 |
-
return f"PAGE_TEXT: {soup.get_text()[:4000]}"
|
| 142 |
except Exception as e:
|
| 143 |
-
return f"
|
| 144 |
-
return f"Unknown tool: {tool}"
|
| 145 |
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
def agent_loop(self, question, file_url):
|
| 155 |
-
#
|
| 156 |
pre_context = ""
|
| 157 |
if file_url:
|
| 158 |
-
ext = file_url.split('.')[-1].lower()
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
pre_context =
|
| 162 |
-
elif ext in ['xlsx', 'xls']:
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
result = self.execute_tool("read_image", file_url, file_url)
|
| 167 |
-
pre_context = f"\nIMAGE TEXT: {result}"
|
| 168 |
elif ext == 'py':
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
| 172 |
memory = pre_context # seed memory with file content
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
TOOL
|
| 194 |
-
INPUT: tool_input
|
| 195 |
-
OR
|
| 196 |
-
FINAL: your_precise_answer"""
|
| 197 |
|
| 198 |
response = self.client.chat.completions.create(
|
| 199 |
-
model="gpt-4o",
|
| 200 |
-
#model ="gpt-5.2-chat-latest",
|
| 201 |
temperature=0,
|
| 202 |
-
messages=[
|
| 203 |
-
|
|
|
|
|
|
|
| 204 |
)
|
| 205 |
-
|
| 206 |
resp = response.choices[0].message.content.strip()
|
| 207 |
-
print(f"Step {step}: {resp}")
|
| 208 |
|
|
|
|
| 209 |
if "FINAL:" in resp:
|
| 210 |
return resp.split("FINAL:")[-1].strip()
|
| 211 |
|
| 212 |
-
#
|
| 213 |
t_match = re.search(r"TOOL:\s*(\w+)", resp, re.I)
|
| 214 |
-
i_match = re.search(r"INPUT:\s*(.
|
| 215 |
-
|
| 216 |
if t_match and i_match:
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
else:
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 227 |
"""
|
|
|
|
| 64 |
from openai import OpenAI
|
| 65 |
|
| 66 |
class BasicAgent:
|
|
|
|
|
|
|
| 67 |
def __init__(self):
|
| 68 |
print("π Super GAIA Agent initialized")
|
| 69 |
self.client = OpenAI()
|
|
|
|
| 70 |
self.audio_model = whisper.load_model("base")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
def __call__(self, question, file_url=None):
|
| 73 |
+
return self.agent_loop(question, file_url)
|
| 74 |
+
|
| 75 |
+
# ββ TOOL: Wikipedia ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
def wiki_search(self, query):
|
| 77 |
try:
|
| 78 |
query = query.strip(' ".,')
|
| 79 |
+
# 1. Find best matching title
|
| 80 |
+
r = requests.get(
|
| 81 |
"https://en.wikipedia.org/w/api.php",
|
| 82 |
params={"action": "query", "list": "search", "srsearch": query,
|
| 83 |
+
"format": "json", "srlimit": 3},
|
| 84 |
timeout=10
|
| 85 |
).json()
|
| 86 |
+
results = r.get("query", {}).get("search", [])
|
| 87 |
+
if not results:
|
| 88 |
+
return f"No Wikipedia results for: {query}"
|
| 89 |
+
title = results[0]["title"]
|
| 90 |
+
# 2. Get full extract via REST
|
| 91 |
summary = requests.get(
|
| 92 |
+
f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}",
|
| 93 |
timeout=10
|
| 94 |
).json()
|
| 95 |
+
extract = summary.get("extract", "")
|
| 96 |
+
if not extract:
|
| 97 |
+
return f"No extract for: {title}"
|
| 98 |
+
return f"WIKI [{title}]: {extract[:3000]}"
|
| 99 |
except Exception as e:
|
| 100 |
+
return f"Wiki error: {e}"
|
| 101 |
+
|
| 102 |
+
# ββ TOOL: Scrape web page βββββββββββββββββββββββββββββββββββββββββ
|
| 103 |
+
def scrape_page(self, url):
|
| 104 |
+
url = url.strip(' "')
|
| 105 |
+
# Block YouTube β it never returns useful content via scraping
|
| 106 |
+
if "youtube.com" in url or "youtu.be" in url:
|
| 107 |
+
return "YouTube pages cannot be scraped. Use yt-dlp captions instead or search for video transcript online."
|
|
|
|
| 108 |
try:
|
| 109 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
| 110 |
+
resp = requests.get(url, timeout=15, headers=headers)
|
| 111 |
+
soup = BeautifulSoup(resp.text, "html.parser")
|
| 112 |
+
# Remove nav/footer/script noise
|
| 113 |
+
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
| 114 |
+
tag.decompose()
|
| 115 |
+
text = soup.get_text(separator=" ", strip=True)
|
| 116 |
+
return f"PAGE [{url[:60]}]: {text[:4000]}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
except Exception as e:
|
| 118 |
+
return f"Scrape error: {e}"
|
|
|
|
| 119 |
|
| 120 |
+
# ββ TOOL: Read audio via Whisper ββββββββββββββββββββββββββββββββββ
|
| 121 |
+
def read_audio(self, url):
|
| 122 |
+
try:
|
| 123 |
+
url = url.strip(' "')
|
| 124 |
+
r = requests.get(url, timeout=30)
|
| 125 |
+
with open("temp_audio_file.mp3", "wb") as f:
|
| 126 |
+
f.write(r.content)
|
| 127 |
+
result = self.audio_model.transcribe("temp_audio_file.mp3")
|
| 128 |
+
return f"TRANSCRIPT: {result['text']}"
|
| 129 |
+
except Exception as e:
|
| 130 |
+
return f"Audio error: {e}"
|
| 131 |
|
| 132 |
+
# ββ TOOL: Read Excel ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 133 |
+
def read_excel(self, url):
|
| 134 |
+
try:
|
| 135 |
+
url = url.strip(' "')
|
| 136 |
+
r = requests.get(url, timeout=20)
|
| 137 |
+
with open("temp_file.xlsx", "wb") as f:
|
| 138 |
+
f.write(r.content)
|
| 139 |
+
df = pd.read_excel("temp_file.xlsx")
|
| 140 |
+
return f"EXCEL_DATA:\n{df.to_string()[:4000]}"
|
| 141 |
+
except Exception as e:
|
| 142 |
+
return f"Excel error: {e}"
|
| 143 |
|
| 144 |
+
# ββ TOOL: Read image via OCR ββββββββββββββββββββββββββββββββββββββ
|
| 145 |
+
def read_image(self, url):
|
| 146 |
+
try:
|
| 147 |
+
url = url.strip(' "')
|
| 148 |
+
r = requests.get(url, timeout=20)
|
| 149 |
+
ext = url.split('.')[-1].lower() or 'png'
|
| 150 |
+
fname = f"temp_img.{ext}"
|
| 151 |
+
with open(fname, "wb") as f:
|
| 152 |
+
f.write(r.content)
|
| 153 |
+
img = Image.open(fname)
|
| 154 |
+
text = pytesseract.image_to_string(img)
|
| 155 |
+
return f"IMAGE_TEXT: {text[:3000]}" if text.strip() else "IMAGE_TEXT: (no text detected by OCR)"
|
| 156 |
+
except Exception as e:
|
| 157 |
+
return f"Image error: {e}"
|
| 158 |
|
| 159 |
+
# ββ TOOL: Execute Python code βββββββββββββββββββββββββββββββββββββ
|
| 160 |
+
def run_python(self, url):
|
| 161 |
+
try:
|
| 162 |
+
url = url.strip(' "')
|
| 163 |
+
r = requests.get(url, timeout=15)
|
| 164 |
+
code = r.text
|
| 165 |
+
# Safe exec with captured stdout
|
| 166 |
+
import io, contextlib
|
| 167 |
+
stdout = io.StringIO()
|
| 168 |
+
with contextlib.redirect_stdout(stdout):
|
| 169 |
+
exec(code, {})
|
| 170 |
+
output = stdout.getvalue()
|
| 171 |
+
return f"PYTHON_OUTPUT: {output[:2000]}" if output else "PYTHON_OUTPUT: (no print output)"
|
| 172 |
+
except Exception as e:
|
| 173 |
+
return f"Python exec error: {e}"
|
| 174 |
|
| 175 |
+
# ββ Route tool calls ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 176 |
+
def execute_tool(self, tool, input_data, file_url):
|
| 177 |
+
# Use file_url as fallback when input_data has no URL
|
| 178 |
+
target = input_data.strip(' "')
|
| 179 |
+
if not target.startswith("http") and file_url:
|
| 180 |
+
target = file_url
|
| 181 |
+
|
| 182 |
+
if tool == "wiki_search":
|
| 183 |
+
return self.wiki_search(input_data)
|
| 184 |
+
elif tool == "scrape_page":
|
| 185 |
+
return self.scrape_page(target)
|
| 186 |
+
elif tool == "read_audio":
|
| 187 |
+
return self.read_audio(target)
|
| 188 |
+
elif tool == "read_excel":
|
| 189 |
+
return self.read_excel(target)
|
| 190 |
+
elif tool == "read_image":
|
| 191 |
+
return self.read_image(target)
|
| 192 |
+
elif tool == "run_python":
|
| 193 |
+
return self.run_python(target)
|
| 194 |
+
else:
|
| 195 |
+
return f"Unknown tool: {tool}"
|
| 196 |
+
|
| 197 |
+
# ββ Main agent loop βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 198 |
def agent_loop(self, question, file_url):
|
| 199 |
+
# ββ PRE-LOAD: handle file-based questions before the loop ββ
|
| 200 |
pre_context = ""
|
| 201 |
if file_url:
|
| 202 |
+
ext = file_url.split('.')[-1].lower().split('?')[0]
|
| 203 |
+
print(f" [Pre-load] detected file ext={ext}, url={file_url}")
|
| 204 |
+
if ext in ['mp3', 'wav', 'ogg', 'm4a', 'flac']:
|
| 205 |
+
pre_context = self.read_audio(file_url)
|
| 206 |
+
elif ext in ['xlsx', 'xls', 'csv']:
|
| 207 |
+
pre_context = self.read_excel(file_url)
|
| 208 |
+
elif ext in ['png', 'jpg', 'jpeg', 'gif', 'webp']:
|
| 209 |
+
pre_context = self.read_image(file_url)
|
|
|
|
|
|
|
| 210 |
elif ext == 'py':
|
| 211 |
+
try:
|
| 212 |
+
pre_context = "PYTHON_CODE:\n" + requests.get(file_url, timeout=10).text[:3000]
|
| 213 |
+
except:
|
| 214 |
+
pass
|
| 215 |
+
|
| 216 |
memory = pre_context # seed memory with file content
|
| 217 |
+
|
| 218 |
+
system_prompt = """You are a precise GAIA benchmark solver.
|
| 219 |
+
Rules:
|
| 220 |
+
- Always output exactly: TOOL: tool_name\\nINPUT: tool_input OR FINAL: answer
|
| 221 |
+
- Never repeat a failed tool call with the same input β change the query or try a different tool
|
| 222 |
+
- For math/logic questions: reason step by step then output FINAL
|
| 223 |
+
- Botanical rule: vegetables are plant parts that are NOT fruits. Seeds inside = botanical fruit (tomato, pepper, corn, zucchini, green beans, peas, cucumber). Roots/stems/leaves = vegetable (carrot, celery, lettuce, broccoli, sweet potato). Basil = herb, not vegetable.
|
| 224 |
+
- For reversed text: decode it completely before answering"""
|
| 225 |
+
|
| 226 |
+
for step in range(10):
|
| 227 |
+
# Build prompt with all context
|
| 228 |
+
prompt = f"""FILE_URL: {file_url if file_url else 'None'}
|
| 229 |
+
|
| 230 |
+
QUESTION: {question}
|
| 231 |
+
|
| 232 |
+
ACCUMULATED KNOWLEDGE:
|
| 233 |
+
{memory if memory else '(none yet)'}
|
| 234 |
+
|
| 235 |
+
AVAILABLE TOOLS: wiki_search, scrape_page, read_audio, read_excel, read_image, run_python
|
| 236 |
+
|
| 237 |
+
What is your next action? Output TOOL+INPUT or FINAL:"""
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
response = self.client.chat.completions.create(
|
| 240 |
+
model="gpt-4o", # upgraded from gpt-4o-mini
|
|
|
|
| 241 |
temperature=0,
|
| 242 |
+
messages=[
|
| 243 |
+
{"role": "system", "content": system_prompt},
|
| 244 |
+
{"role": "user", "content": prompt}
|
| 245 |
+
]
|
| 246 |
)
|
| 247 |
+
|
| 248 |
resp = response.choices[0].message.content.strip()
|
| 249 |
+
print(f" Step {step}: {resp[:120]}")
|
| 250 |
|
| 251 |
+
# ββ Check for final answer ββ
|
| 252 |
if "FINAL:" in resp:
|
| 253 |
return resp.split("FINAL:")[-1].strip()
|
| 254 |
|
| 255 |
+
# ββ Parse tool call ββ
|
| 256 |
t_match = re.search(r"TOOL:\s*(\w+)", resp, re.I)
|
| 257 |
+
i_match = re.search(r"INPUT:\s*(.+)", resp, re.I | re.DOTALL)
|
| 258 |
+
|
| 259 |
if t_match and i_match:
|
| 260 |
+
tool_name = t_match.group(1).lower().strip()
|
| 261 |
+
tool_input = i_match.group(1).strip().split('\n')[0] # first line only
|
| 262 |
+
|
| 263 |
+
result = self.execute_tool(tool_name, tool_input, file_url)
|
| 264 |
+
print(f" [{tool_name}] β {result[:100]}")
|
| 265 |
+
|
| 266 |
+
# Only add useful results to memory (skip empty/error loops)
|
| 267 |
+
if len(result) > 30 and "error" not in result.lower()[:20]:
|
| 268 |
+
memory += f"\n\n[Step {step} - {tool_name}({tool_input[:50]})]\n{result[:1500]}"
|
| 269 |
+
else:
|
| 270 |
+
# Tool failed β tell the model so it tries something different
|
| 271 |
+
memory += f"\n\n[Step {step} - {tool_name} FAILED: {result[:200]}. Try a different approach.]"
|
| 272 |
else:
|
| 273 |
+
# Model gave a thought without a tool call β add to memory as reasoning
|
| 274 |
+
memory += f"\n\n[Step {step} - Reasoning]: {resp[:300]}"
|
| 275 |
+
|
| 276 |
+
# Fallback: ask the model to give best answer from what it has
|
| 277 |
+
fallback = self.client.chat.completions.create(
|
| 278 |
+
model="gpt-4o",
|
| 279 |
+
temperature=0,
|
| 280 |
+
messages=[
|
| 281 |
+
{"role": "system", "content": system_prompt},
|
| 282 |
+
{"role": "user", "content": f"Based on everything gathered, give your best FINAL answer.\nQUESTION: {question}\nKNOWLEDGE:\n{memory}"}
|
| 283 |
+
]
|
| 284 |
+
)
|
| 285 |
+
resp = fallback.choices[0].message.content.strip()
|
| 286 |
+
if "FINAL:" in resp:
|
| 287 |
+
return resp.split("FINAL:")[-1].strip()
|
| 288 |
+
return resp
|
| 289 |
|
| 290 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 291 |
"""
|