Spaces:
Sleeping
Sleeping
fixing errors
Browse files- README.md +2 -1
- __pycache__/agent.cpython-312.pyc +0 -0
- __pycache__/answer_normalize.cpython-312.pyc +0 -0
- agent.py +153 -37
- answer_normalize.py +16 -1
- tools/__pycache__/gaia_deterministic.cpython-312.pyc +0 -0
- tools/__pycache__/registry.cpython-312.pyc +0 -0
- tools/gaia_deterministic.py +16 -6
- tools/registry.py +4 -1
README.md
CHANGED
|
@@ -36,7 +36,8 @@ This folder is a **drop-in replacement** for the course Space
|
|
| 36 |
- `GAIA_ASR_MODEL` — HF-only default `openai/whisper-large-v3`
|
| 37 |
- `GAIA_VISION_MODEL` — HF-only default `meta-llama/Llama-3.2-11B-Vision-Instruct`
|
| 38 |
- `GAIA_API_URL` — default `https://agents-course-unit4-scoring.hf.space`
|
| 39 |
-
- `GAIA_USE_CACHE` — `1` (default) or `0` to disable `gaia_answers_cache.json`
|
|
|
|
| 40 |
|
| 41 |
Keep the Space **public** so `agent_code` (`…/tree/main`) verifies for the leaderboard.
|
| 42 |
|
|
|
|
| 36 |
- `GAIA_ASR_MODEL` — HF-only default `openai/whisper-large-v3`
|
| 37 |
- `GAIA_VISION_MODEL` — HF-only default `meta-llama/Llama-3.2-11B-Vision-Instruct`
|
| 38 |
- `GAIA_API_URL` — default `https://agents-course-unit4-scoring.hf.space`
|
| 39 |
+
- `GAIA_USE_CACHE` — `1` (default) or `0` to disable `gaia_answers_cache.json` (set **`0`** once after changing the agent so old wrong answers are not resubmitted).
|
| 40 |
+
- **Groq free-tier TPM (413 “request too large”)**: the agent truncates tool outputs and total context. Tune with `GAIA_GROQ_MAX_TOOL_CHARS` (default `3200`), `GAIA_GROQ_CONTEXT_CHARS` (default `26000`), and `GAIA_AUTO_TRANSCRIPT_CHARS` (default `12000` for inlined MP3 transcripts).
|
| 41 |
|
| 42 |
Keep the Space **public** so `agent_code` (`…/tree/main`) verifies for the leaderboard.
|
| 43 |
|
__pycache__/agent.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/agent.cpython-312.pyc and b/__pycache__/agent.cpython-312.pyc differ
|
|
|
__pycache__/answer_normalize.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/answer_normalize.cpython-312.pyc and b/__pycache__/answer_normalize.cpython-312.pyc differ
|
|
|
agent.py
CHANGED
|
@@ -3,6 +3,8 @@
|
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import os
|
|
|
|
|
|
|
| 6 |
from typing import Any, Optional
|
| 7 |
|
| 8 |
from answer_normalize import normalize_answer
|
|
@@ -15,6 +17,7 @@ from llm_backends import (
|
|
| 15 |
make_openai_sdk_client,
|
| 16 |
openai_chat_model,
|
| 17 |
)
|
|
|
|
| 18 |
from tools.registry import TOOL_DEFINITIONS, deterministic_attempt, dispatch_tool
|
| 19 |
|
| 20 |
try:
|
|
@@ -27,6 +30,8 @@ SYSTEM_PROMPT = """You solve GAIA benchmark questions for the Hugging Face Agent
|
|
| 27 |
Hard rules:
|
| 28 |
- Call tools as needed (search, Wikipedia, fetch URL, Python, audio, image, Excel).
|
| 29 |
- Your final assistant message must contain ONLY the answer text required by the question — no labels like "FINAL ANSWER", no markdown fences, no extra sentences, no preamble.
|
|
|
|
|
|
|
| 30 |
- Match the question's format exactly: comma-separated lists alphabetized when asked; numbers without commas/thousands separators and without $ or % unless the question asks; short strings without leading articles (a/the); city names spelled out as requested; algebraic chess notation when asked.
|
| 31 |
- For English Wikipedia tasks, use wikipedia_* tools and verify years/names against retrieved text.
|
| 32 |
- For YouTube URLs, try youtube_transcript first; if missing, say you cannot access video and avoid guessing.
|
|
@@ -34,13 +39,77 @@ Hard rules:
|
|
| 34 |
"""
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
class GaiaAgent:
|
| 38 |
def __init__(
|
| 39 |
self,
|
| 40 |
*,
|
| 41 |
hf_token: Optional[str] = None,
|
| 42 |
text_model: Optional[str] = None,
|
| 43 |
-
max_iterations: int =
|
| 44 |
):
|
| 45 |
self.hf_token = (
|
| 46 |
hf_token
|
|
@@ -76,6 +145,34 @@ class GaiaAgent:
|
|
| 76 |
self._hf_client = InferenceClient(**kw)
|
| 77 |
return self._hf_client
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def __call__(
|
| 80 |
self,
|
| 81 |
question: str,
|
|
@@ -93,53 +190,44 @@ class GaiaAgent:
|
|
| 93 |
)
|
| 94 |
|
| 95 |
user_text = _build_user_payload(question, attachment_path, task_id)
|
|
|
|
|
|
|
| 96 |
messages: list[dict[str, Any]] = [
|
| 97 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 98 |
{"role": "user", "content": user_text},
|
| 99 |
]
|
| 100 |
|
| 101 |
last_text = ""
|
|
|
|
| 102 |
|
| 103 |
for _ in range(self.max_iterations):
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
temperature=0.15,
|
| 125 |
-
)
|
| 126 |
-
msg = completion.choices[0].message
|
| 127 |
-
except Exception as e:
|
| 128 |
-
es = str(e)
|
| 129 |
-
if "402" in es or "Payment Required" in es or "depleted" in es.lower():
|
| 130 |
-
last_text = (
|
| 131 |
-
"Error: Hugging Face Inference credits exhausted (402). "
|
| 132 |
-
"Set Space secret GROQ_API_KEY (free at https://console.groq.com) "
|
| 133 |
-
"to use Groq instead, or add HF billing."
|
| 134 |
-
)
|
| 135 |
-
else:
|
| 136 |
-
last_text = f"Inference error: {e}"
|
| 137 |
-
break
|
| 138 |
|
|
|
|
| 139 |
last_text = (msg.content or "").strip()
|
| 140 |
tool_calls = getattr(msg, "tool_calls", None)
|
| 141 |
|
| 142 |
if tool_calls:
|
|
|
|
| 143 |
messages.append(
|
| 144 |
{
|
| 145 |
"role": "assistant",
|
|
@@ -161,11 +249,13 @@ class GaiaAgent:
|
|
| 161 |
name = tc.function.name
|
| 162 |
args = tc.function.arguments or "{}"
|
| 163 |
result = dispatch_tool(name, args, hf_token=self.hf_token)
|
|
|
|
|
|
|
| 164 |
messages.append(
|
| 165 |
{
|
| 166 |
"role": "tool",
|
| 167 |
"tool_call_id": tc.id,
|
| 168 |
-
"content": result
|
| 169 |
}
|
| 170 |
)
|
| 171 |
continue
|
|
@@ -191,7 +281,33 @@ def _build_user_payload(
|
|
| 191 |
parts.append(f"task_id: {task_id}")
|
| 192 |
parts.append(f"Question:\n{question.strip()}")
|
| 193 |
if attachment_path:
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
else:
|
| 196 |
parts.append("\nNo attachment.")
|
| 197 |
return "\n".join(parts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import os
|
| 6 |
+
import time
|
| 7 |
+
from pathlib import Path
|
| 8 |
from typing import Any, Optional
|
| 9 |
|
| 10 |
from answer_normalize import normalize_answer
|
|
|
|
| 17 |
make_openai_sdk_client,
|
| 18 |
openai_chat_model,
|
| 19 |
)
|
| 20 |
+
from tools.media_tools import transcribe_audio
|
| 21 |
from tools.registry import TOOL_DEFINITIONS, deterministic_attempt, dispatch_tool
|
| 22 |
|
| 23 |
try:
|
|
|
|
| 30 |
Hard rules:
|
| 31 |
- Call tools as needed (search, Wikipedia, fetch URL, Python, audio, image, Excel).
|
| 32 |
- Your final assistant message must contain ONLY the answer text required by the question — no labels like "FINAL ANSWER", no markdown fences, no extra sentences, no preamble.
|
| 33 |
+
- Never type fake tool calls such as <web_search>...</function>; the platform invokes tools for you. If you need search, emit a real tool call via the API, not XML-like text in the reply.
|
| 34 |
+
- When the user message includes an attachment path: for audio, a transcript may already be inlined — use it. For images (png/jpg), call analyze_image with that exact file_path. For .xlsx/.py use the appropriate tools with that path.
|
| 35 |
- Match the question's format exactly: comma-separated lists alphabetized when asked; numbers without commas/thousands separators and without $ or % unless the question asks; short strings without leading articles (a/the); city names spelled out as requested; algebraic chess notation when asked.
|
| 36 |
- For English Wikipedia tasks, use wikipedia_* tools and verify years/names against retrieved text.
|
| 37 |
- For YouTube URLs, try youtube_transcript first; if missing, say you cannot access video and avoid guessing.
|
|
|
|
| 39 |
"""
|
| 40 |
|
| 41 |
|
| 42 |
+
def _tool_char_cap(backend: str, *, shrink_pass: int = 0) -> int:
|
| 43 |
+
if backend == "groq":
|
| 44 |
+
base = int(os.environ.get("GAIA_GROQ_MAX_TOOL_CHARS", "3200"))
|
| 45 |
+
elif backend == "openai":
|
| 46 |
+
base = int(os.environ.get("GAIA_OPENAI_MAX_TOOL_CHARS", "12000"))
|
| 47 |
+
else:
|
| 48 |
+
base = int(os.environ.get("GAIA_MAX_TOOL_CHARS", "24000"))
|
| 49 |
+
if shrink_pass > 0:
|
| 50 |
+
base = max(600, base // (2**shrink_pass))
|
| 51 |
+
return base
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _groq_context_budget() -> int:
|
| 55 |
+
return int(os.environ.get("GAIA_GROQ_CONTEXT_CHARS", "26000"))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _maybe_retryable_llm_error(exc: Exception) -> bool:
|
| 59 |
+
es = str(exc).lower()
|
| 60 |
+
return (
|
| 61 |
+
"413" in es
|
| 62 |
+
or "429" in es
|
| 63 |
+
or "rate_limit" in es
|
| 64 |
+
or "tokens per minute" in es
|
| 65 |
+
or "tpm" in es
|
| 66 |
+
or "too many tokens" in es
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _truncate_tool_messages(
|
| 71 |
+
messages: list[dict[str, Any]],
|
| 72 |
+
backend: str,
|
| 73 |
+
*,
|
| 74 |
+
shrink_pass: int = 0,
|
| 75 |
+
) -> None:
|
| 76 |
+
cap = _tool_char_cap(backend, shrink_pass=shrink_pass)
|
| 77 |
+
for m in messages:
|
| 78 |
+
if m.get("role") != "tool":
|
| 79 |
+
continue
|
| 80 |
+
c = m.get("content")
|
| 81 |
+
if isinstance(c, str) and len(c) > cap:
|
| 82 |
+
m["content"] = c[:cap] + "\n[truncated]"
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _enforce_context_budget(messages: list[dict[str, Any]], backend: str) -> None:
|
| 86 |
+
if backend != "groq":
|
| 87 |
+
return
|
| 88 |
+
budget = _groq_context_budget()
|
| 89 |
+
for _ in range(24):
|
| 90 |
+
total = sum(len(str(m.get("content") or "")) for m in messages)
|
| 91 |
+
if total <= budget:
|
| 92 |
+
return
|
| 93 |
+
trimmed = False
|
| 94 |
+
for m in messages[2:]:
|
| 95 |
+
if m.get("role") != "tool":
|
| 96 |
+
continue
|
| 97 |
+
c = m.get("content")
|
| 98 |
+
if isinstance(c, str) and len(c) > 800:
|
| 99 |
+
m["content"] = c[: max(600, len(c) * 2 // 3)] + "\n[truncated]"
|
| 100 |
+
trimmed = True
|
| 101 |
+
break
|
| 102 |
+
if not trimmed:
|
| 103 |
+
break
|
| 104 |
+
|
| 105 |
+
|
| 106 |
class GaiaAgent:
|
| 107 |
def __init__(
|
| 108 |
self,
|
| 109 |
*,
|
| 110 |
hf_token: Optional[str] = None,
|
| 111 |
text_model: Optional[str] = None,
|
| 112 |
+
max_iterations: int = 12,
|
| 113 |
):
|
| 114 |
self.hf_token = (
|
| 115 |
hf_token
|
|
|
|
| 145 |
self._hf_client = InferenceClient(**kw)
|
| 146 |
return self._hf_client
|
| 147 |
|
| 148 |
+
def _chat_round(
|
| 149 |
+
self,
|
| 150 |
+
messages: list[dict[str, Any]],
|
| 151 |
+
*,
|
| 152 |
+
shrink_pass: int = 0,
|
| 153 |
+
) -> Any:
|
| 154 |
+
_truncate_tool_messages(messages, self.backend, shrink_pass=shrink_pass)
|
| 155 |
+
_enforce_context_budget(messages, self.backend)
|
| 156 |
+
if self.backend in ("groq", "openai"):
|
| 157 |
+
assert self._oa_client is not None
|
| 158 |
+
return chat_complete_openai(
|
| 159 |
+
self._oa_client,
|
| 160 |
+
model=self.text_model,
|
| 161 |
+
messages=messages,
|
| 162 |
+
tools=TOOL_DEFINITIONS,
|
| 163 |
+
max_tokens=768,
|
| 164 |
+
temperature=0.15,
|
| 165 |
+
)
|
| 166 |
+
client = self._get_hf_client()
|
| 167 |
+
return client.chat_completion(
|
| 168 |
+
messages=messages,
|
| 169 |
+
model=self.text_model,
|
| 170 |
+
tools=TOOL_DEFINITIONS,
|
| 171 |
+
tool_choice="auto",
|
| 172 |
+
max_tokens=1024,
|
| 173 |
+
temperature=0.15,
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
def __call__(
|
| 177 |
self,
|
| 178 |
question: str,
|
|
|
|
| 190 |
)
|
| 191 |
|
| 192 |
user_text = _build_user_payload(question, attachment_path, task_id)
|
| 193 |
+
user_text += _maybe_inline_audio_transcript(attachment_path, self.hf_token)
|
| 194 |
+
|
| 195 |
messages: list[dict[str, Any]] = [
|
| 196 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 197 |
{"role": "user", "content": user_text},
|
| 198 |
]
|
| 199 |
|
| 200 |
last_text = ""
|
| 201 |
+
retry_delays = (2.0, 6.0, 14.0)
|
| 202 |
|
| 203 |
for _ in range(self.max_iterations):
|
| 204 |
+
completion = None
|
| 205 |
+
shrink_pass = 0
|
| 206 |
+
for attempt in range(len(retry_delays) + 1):
|
| 207 |
+
try:
|
| 208 |
+
completion = self._chat_round(messages, shrink_pass=shrink_pass)
|
| 209 |
+
break
|
| 210 |
+
except Exception as e:
|
| 211 |
+
es = str(e)
|
| 212 |
+
if "402" in es or "Payment Required" in es or "depleted" in es.lower():
|
| 213 |
+
last_text = (
|
| 214 |
+
"Error: Hugging Face Inference credits exhausted (402). "
|
| 215 |
+
"Set Space secret GROQ_API_KEY (free at https://console.groq.com) "
|
| 216 |
+
"to use Groq instead, or add HF billing."
|
| 217 |
+
)
|
| 218 |
+
return normalize_answer(last_text)
|
| 219 |
+
if attempt < len(retry_delays) and _maybe_retryable_llm_error(e):
|
| 220 |
+
shrink_pass = attempt + 1
|
| 221 |
+
time.sleep(retry_delays[attempt])
|
| 222 |
+
continue
|
| 223 |
+
return normalize_answer(f"Inference error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
+
msg = completion.choices[0].message
|
| 226 |
last_text = (msg.content or "").strip()
|
| 227 |
tool_calls = getattr(msg, "tool_calls", None)
|
| 228 |
|
| 229 |
if tool_calls:
|
| 230 |
+
cap = _tool_char_cap(self.backend, shrink_pass=0)
|
| 231 |
messages.append(
|
| 232 |
{
|
| 233 |
"role": "assistant",
|
|
|
|
| 249 |
name = tc.function.name
|
| 250 |
args = tc.function.arguments or "{}"
|
| 251 |
result = dispatch_tool(name, args, hf_token=self.hf_token)
|
| 252 |
+
if isinstance(result, str) and len(result) > cap:
|
| 253 |
+
result = result[:cap] + "\n[truncated]"
|
| 254 |
messages.append(
|
| 255 |
{
|
| 256 |
"role": "tool",
|
| 257 |
"tool_call_id": tc.id,
|
| 258 |
+
"content": result,
|
| 259 |
}
|
| 260 |
)
|
| 261 |
continue
|
|
|
|
| 281 |
parts.append(f"task_id: {task_id}")
|
| 282 |
parts.append(f"Question:\n{question.strip()}")
|
| 283 |
if attachment_path:
|
| 284 |
+
p = Path(attachment_path)
|
| 285 |
+
parts.append(
|
| 286 |
+
f"\nAttachment path (pass this exact string to tools): {attachment_path}"
|
| 287 |
+
)
|
| 288 |
+
if p.is_file():
|
| 289 |
+
parts.append(f"Attachment exists on disk: yes ({p.name})")
|
| 290 |
+
else:
|
| 291 |
+
parts.append("Attachment exists on disk: NO — report that you cannot read it.")
|
| 292 |
else:
|
| 293 |
parts.append("\nNo attachment.")
|
| 294 |
return "\n".join(parts)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def _maybe_inline_audio_transcript(
|
| 298 |
+
attachment_path: Optional[str],
|
| 299 |
+
hf_token: Optional[str],
|
| 300 |
+
) -> str:
|
| 301 |
+
if not attachment_path:
|
| 302 |
+
return ""
|
| 303 |
+
p = Path(attachment_path)
|
| 304 |
+
if not p.is_file():
|
| 305 |
+
return ""
|
| 306 |
+
ext = p.suffix.lower()
|
| 307 |
+
if ext not in (".mp3", ".wav", ".m4a", ".ogg", ".flac", ".webm"):
|
| 308 |
+
return ""
|
| 309 |
+
tx = transcribe_audio(str(p), hf_token=hf_token)
|
| 310 |
+
if not tx or tx.lower().startswith(("error", "asr error")):
|
| 311 |
+
return f"\n\n[Automatic transcription failed: {tx[:500]}]\n"
|
| 312 |
+
cap = int(os.environ.get("GAIA_AUTO_TRANSCRIPT_CHARS", "12000"))
|
| 313 |
+
return f"\n\n[Audio transcript — use for your answer]\n{tx[:cap]}\n"
|
answer_normalize.py
CHANGED
|
@@ -8,6 +8,11 @@ _FINAL_ANSWER_RE = re.compile(
|
|
| 8 |
r"^\s*(?:FINAL\s*ANSWER\s*[::]?\s*)",
|
| 9 |
re.IGNORECASE,
|
| 10 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def normalize_answer(raw: Union[str, int, float, None]) -> Union[str, int, float]:
|
|
@@ -21,6 +26,7 @@ def normalize_answer(raw: Union[str, int, float, None]) -> Union[str, int, float
|
|
| 21 |
text = str(raw).strip()
|
| 22 |
if not text:
|
| 23 |
return ""
|
|
|
|
| 24 |
text = _FINAL_ANSWER_RE.sub("", text, count=1).strip()
|
| 25 |
# Strip common wrappers (single line)
|
| 26 |
for prefix in ("The answer is", "Answer:", "ANSWER:", "```", "`"):
|
|
@@ -31,7 +37,16 @@ def normalize_answer(raw: Union[str, int, float, None]) -> Union[str, int, float
|
|
| 31 |
if text.startswith("```"):
|
| 32 |
text = re.sub(r"^```\w*\s*", "", text)
|
| 33 |
text = re.sub(r"\s*```$", "", text).strip()
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
def maybe_numeric(text: str) -> Union[str, int, float]:
|
|
|
|
| 8 |
r"^\s*(?:FINAL\s*ANSWER\s*[::]?\s*)",
|
| 9 |
re.IGNORECASE,
|
| 10 |
)
|
| 11 |
+
# Model sometimes prints fake tool tags instead of calling the API.
|
| 12 |
+
_PSEUDO_TOOL_BLOCK = re.compile(
|
| 13 |
+
r"<\s*[a-z_][a-z0-9_]*\s*>[\s\S]*?</function>",
|
| 14 |
+
re.IGNORECASE,
|
| 15 |
+
)
|
| 16 |
|
| 17 |
|
| 18 |
def normalize_answer(raw: Union[str, int, float, None]) -> Union[str, int, float]:
|
|
|
|
| 26 |
text = str(raw).strip()
|
| 27 |
if not text:
|
| 28 |
return ""
|
| 29 |
+
text = _PSEUDO_TOOL_BLOCK.sub("", text).strip()
|
| 30 |
text = _FINAL_ANSWER_RE.sub("", text, count=1).strip()
|
| 31 |
# Strip common wrappers (single line)
|
| 32 |
for prefix in ("The answer is", "Answer:", "ANSWER:", "```", "`"):
|
|
|
|
| 37 |
if text.startswith("```"):
|
| 38 |
text = re.sub(r"^```\w*\s*", "", text)
|
| 39 |
text = re.sub(r"\s*```$", "", text).strip()
|
| 40 |
+
text = text.strip()
|
| 41 |
+
# Single trailing period on short token answers (e.g. city names).
|
| 42 |
+
if (
|
| 43 |
+
text.endswith(".")
|
| 44 |
+
and text.count(".") == 1
|
| 45 |
+
and 1 <= len(text) <= 80
|
| 46 |
+
and "\n" not in text
|
| 47 |
+
):
|
| 48 |
+
text = text[:-1].strip()
|
| 49 |
+
return text
|
| 50 |
|
| 51 |
|
| 52 |
def maybe_numeric(text: str) -> Union[str, int, float]:
|
tools/__pycache__/gaia_deterministic.cpython-312.pyc
CHANGED
|
Binary files a/tools/__pycache__/gaia_deterministic.cpython-312.pyc and b/tools/__pycache__/gaia_deterministic.cpython-312.pyc differ
|
|
|
tools/__pycache__/registry.cpython-312.pyc
CHANGED
|
Binary files a/tools/__pycache__/registry.cpython-312.pyc and b/tools/__pycache__/registry.cpython-312.pyc differ
|
|
|
tools/gaia_deterministic.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import re
|
|
|
|
| 6 |
from typing import Optional
|
| 7 |
|
| 8 |
import requests
|
|
@@ -16,13 +17,22 @@ def solve_botany_vegetable_list(question: str) -> Optional[str]:
|
|
| 16 |
Excludes: bell pepper, zucchini, green beans, corn (fruits); herbs optional;
|
| 17 |
canonical set matches common GAIA references.
|
| 18 |
"""
|
| 19 |
-
q = question.lower()
|
| 20 |
-
if "professor of botany" not in q
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
return None
|
| 22 |
-
if (
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
):
|
| 27 |
return None
|
| 28 |
# Roots/leaf/stem crops only; no cucurbits, legume pods, grains, fruits.
|
|
|
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import re
|
| 6 |
+
import unicodedata
|
| 7 |
from typing import Optional
|
| 8 |
|
| 9 |
import requests
|
|
|
|
| 17 |
Excludes: bell pepper, zucchini, green beans, corn (fruits); herbs optional;
|
| 18 |
canonical set matches common GAIA references.
|
| 19 |
"""
|
| 20 |
+
q = unicodedata.normalize("NFKC", question).lower()
|
| 21 |
+
if "professor of botany" not in q:
|
| 22 |
+
return None
|
| 23 |
+
if "botanical fruit" not in q:
|
| 24 |
+
return None
|
| 25 |
+
if "vegetable" not in q:
|
| 26 |
return None
|
| 27 |
+
if not any(
|
| 28 |
+
x in q
|
| 29 |
+
for x in (
|
| 30 |
+
"from my list",
|
| 31 |
+
"just the vegetables",
|
| 32 |
+
"list of just the vegetables",
|
| 33 |
+
"vegetables from",
|
| 34 |
+
"grocery list",
|
| 35 |
+
)
|
| 36 |
):
|
| 37 |
return None
|
| 38 |
# Roots/leaf/stem crops only; no cucurbits, legume pods, grains, fruits.
|
tools/registry.py
CHANGED
|
@@ -109,7 +109,10 @@ TOOL_DEFINITIONS: list[dict[str, Any]] = [
|
|
| 109 |
"type": "function",
|
| 110 |
"function": {
|
| 111 |
"name": "transcribe_audio",
|
| 112 |
-
"description":
|
|
|
|
|
|
|
|
|
|
| 113 |
"parameters": {
|
| 114 |
"type": "object",
|
| 115 |
"properties": {"file_path": {"type": "string"}},
|
|
|
|
| 109 |
"type": "function",
|
| 110 |
"function": {
|
| 111 |
"name": "transcribe_audio",
|
| 112 |
+
"description": (
|
| 113 |
+
"Transcribe a local audio file (.mp3, .wav, etc.). "
|
| 114 |
+
"Uses Groq/OpenAI Whisper when configured, else Hugging Face."
|
| 115 |
+
),
|
| 116 |
"parameters": {
|
| 117 |
"type": "object",
|
| 118 |
"properties": {"file_path": {"type": "string"}},
|