Spaces:
Sleeping
Sleeping
Fix prompts and utils
Browse files- agent.py +341 -37
- llm_client.py +57 -22
- prompts.py +40 -34
- tools.py +270 -19
- utils.py +277 -42
agent.py
CHANGED
|
@@ -1,74 +1,378 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
|
|
|
|
|
|
| 3 |
from dataclasses import dataclass
|
| 4 |
-
from
|
|
|
|
| 5 |
|
|
|
|
| 6 |
from prompts import build_solver_prompt
|
| 7 |
from tools import TaskFileTool
|
| 8 |
from utils import extract_final_answer, normalize_final_answer
|
| 9 |
-
from llm_client import HFLLMClient
|
| 10 |
|
| 11 |
|
| 12 |
@dataclass
|
| 13 |
class AgentConfig:
|
| 14 |
api_base_url: str = "https://agents-course-unit4-scoring.hf.space"
|
| 15 |
max_context_chars: int = 12000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
class SubmissionAgent:
|
| 19 |
-
"""
|
| 20 |
-
V1 agent for the Hugging Face Agents Course Unit 4 final project.
|
| 21 |
-
|
| 22 |
-
Goals:
|
| 23 |
-
- Accept a benchmark question and optional task_id
|
| 24 |
-
- Load attached task-file context when available
|
| 25 |
-
- Return ONLY the final answer string
|
| 26 |
-
- Stay framework-agnostic for now so we can plug in any LLM later
|
| 27 |
-
"""
|
| 28 |
def __init__(self, config: Optional[AgentConfig] = None, llm_client=None):
|
| 29 |
self.config = config or AgentConfig()
|
| 30 |
self.llm_client = llm_client or HFLLMClient()
|
| 31 |
self.task_file_tool = TaskFileTool(api_base_url=self.config.api_base_url)
|
| 32 |
|
| 33 |
def __call__(self, question: str, task_id: Optional[str] = None) -> str:
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
final_answer = extract_final_answer(raw_output)
|
| 40 |
-
return
|
| 41 |
|
| 42 |
-
def
|
| 43 |
-
"""
|
| 44 |
-
Try to fetch and read any task-linked file.
|
| 45 |
-
Safe fallback: empty context.
|
| 46 |
-
"""
|
| 47 |
if not task_id:
|
| 48 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
try:
|
| 51 |
-
|
| 52 |
-
if
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
|
|
|
| 56 |
except Exception:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
return ""
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
1)
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
"""
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
try:
|
| 71 |
return self.llm_client.generate(prompt)
|
| 72 |
except Exception as e:
|
| 73 |
-
print(f"LLM generation error: {e}")
|
| 74 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
import inspect
|
| 4 |
+
import re
|
| 5 |
from dataclasses import dataclass
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Callable, Optional, cast
|
| 8 |
|
| 9 |
+
from llm_client import HFLLMClient
|
| 10 |
from prompts import build_solver_prompt
|
| 11 |
from tools import TaskFileTool
|
| 12 |
from utils import extract_final_answer, normalize_final_answer
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
@dataclass
|
| 16 |
class AgentConfig:
|
| 17 |
api_base_url: str = "https://agents-course-unit4-scoring.hf.space"
|
| 18 |
max_context_chars: int = 12000
|
| 19 |
+
max_file_preview_chars: int = 4000
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@dataclass
|
| 23 |
+
class TaskArtifact:
|
| 24 |
+
task_id: Optional[str]
|
| 25 |
+
exists: bool
|
| 26 |
+
file_path: Optional[Path]
|
| 27 |
+
file_name: str
|
| 28 |
+
suffix: str
|
| 29 |
+
text_context: str
|
| 30 |
|
| 31 |
|
| 32 |
class SubmissionAgent:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def __init__(self, config: Optional[AgentConfig] = None, llm_client=None):
|
| 34 |
self.config = config or AgentConfig()
|
| 35 |
self.llm_client = llm_client or HFLLMClient()
|
| 36 |
self.task_file_tool = TaskFileTool(api_base_url=self.config.api_base_url)
|
| 37 |
|
| 38 |
def __call__(self, question: str, task_id: Optional[str] = None) -> str:
|
| 39 |
+
artifact = self._load_artifact(task_id=task_id)
|
| 40 |
+
route = self._route(question=question, artifact=artifact)
|
| 41 |
+
|
| 42 |
+
raw_output = self._dispatch(
|
| 43 |
+
route=route,
|
| 44 |
+
question=question,
|
| 45 |
+
artifact=artifact,
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
final_answer = extract_final_answer(raw_output)
|
| 49 |
+
return self._normalize_answer(question=question, answer=final_answer)
|
| 50 |
|
| 51 |
+
def _load_artifact(self, task_id: Optional[str]) -> TaskArtifact:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
if not task_id:
|
| 53 |
+
return TaskArtifact(
|
| 54 |
+
task_id=None,
|
| 55 |
+
exists=False,
|
| 56 |
+
file_path=None,
|
| 57 |
+
file_name="",
|
| 58 |
+
suffix="",
|
| 59 |
+
text_context="",
|
| 60 |
+
)
|
| 61 |
|
| 62 |
+
file_path: Optional[Path] = None
|
| 63 |
+
text_context = ""
|
| 64 |
+
|
| 65 |
+
# Safe dynamic lookup so static checker does not complain
|
| 66 |
try:
|
| 67 |
+
download_fn = getattr(self.task_file_tool, "download_task_file", None)
|
| 68 |
+
if callable(download_fn):
|
| 69 |
+
typed_download_fn = cast(Callable[[str], Optional[Path]], download_fn)
|
| 70 |
+
file_path = typed_download_fn(task_id)
|
| 71 |
+
except Exception:
|
| 72 |
+
file_path = None
|
| 73 |
|
| 74 |
+
try:
|
| 75 |
+
text_context = self.task_file_tool.get_task_context(task_id=task_id) or ""
|
| 76 |
except Exception:
|
| 77 |
+
text_context = ""
|
| 78 |
+
|
| 79 |
+
if text_context:
|
| 80 |
+
text_context = text_context[: self.config.max_context_chars]
|
| 81 |
+
|
| 82 |
+
file_name = file_path.name if file_path else ""
|
| 83 |
+
suffix = file_path.suffix.lower() if file_path else ""
|
| 84 |
+
|
| 85 |
+
return TaskArtifact(
|
| 86 |
+
task_id=task_id,
|
| 87 |
+
exists=file_path is not None,
|
| 88 |
+
file_path=file_path,
|
| 89 |
+
file_name=file_name,
|
| 90 |
+
suffix=suffix,
|
| 91 |
+
text_context=text_context,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
def _route(self, question: str, artifact: TaskArtifact) -> str:
|
| 95 |
+
q = (question or "").strip().lower()
|
| 96 |
+
|
| 97 |
+
if artifact.exists:
|
| 98 |
+
if artifact.suffix in {".mp3", ".wav", ".m4a", ".flac"}:
|
| 99 |
+
return "audio"
|
| 100 |
+
if artifact.suffix in {".png", ".jpg", ".jpeg", ".webp", ".bmp"}:
|
| 101 |
+
return "image"
|
| 102 |
+
if artifact.suffix in {".xlsx", ".xls", ".csv"}:
|
| 103 |
+
return "spreadsheet"
|
| 104 |
+
if artifact.suffix in {".py"}:
|
| 105 |
+
return "code_file"
|
| 106 |
+
if artifact.suffix in {".txt", ".md", ".json", ".html", ".xml"}:
|
| 107 |
+
return "text_file"
|
| 108 |
+
|
| 109 |
+
if self._looks_like_reversed_text(q):
|
| 110 |
+
return "reverse_text"
|
| 111 |
+
|
| 112 |
+
if "youtube.com" in q or "youtu.be" in q or "video " in q:
|
| 113 |
+
return "video"
|
| 114 |
+
|
| 115 |
+
if "wikipedia" in q or "published by" in q or "article" in q or "paper" in q:
|
| 116 |
+
return "web_lookup"
|
| 117 |
+
|
| 118 |
+
if "algebraic notation" in q and "chess" in q:
|
| 119 |
+
return "image"
|
| 120 |
+
|
| 121 |
+
if "audio recording" in q or "voice memo" in q or "listen to" in q:
|
| 122 |
+
return "audio"
|
| 123 |
+
|
| 124 |
+
if "excel file" in q or "spreadsheet" in q:
|
| 125 |
+
return "spreadsheet"
|
| 126 |
+
|
| 127 |
+
if "final numeric output from the attached python code" in q:
|
| 128 |
+
return "code_file"
|
| 129 |
+
|
| 130 |
+
return "general"
|
| 131 |
+
|
| 132 |
+
def _dispatch(self, route: str, question: str, artifact: TaskArtifact) -> str:
|
| 133 |
+
if route == "reverse_text":
|
| 134 |
+
answer = self._solve_reverse_text(question)
|
| 135 |
+
if answer:
|
| 136 |
+
return answer
|
| 137 |
+
|
| 138 |
+
if route == "spreadsheet":
|
| 139 |
+
return self._solve_with_llm(
|
| 140 |
+
question=question,
|
| 141 |
+
artifact=artifact,
|
| 142 |
+
route=route,
|
| 143 |
+
extra_instructions=(
|
| 144 |
+
"This task appears to involve a spreadsheet or table file. "
|
| 145 |
+
"Use any provided file preview carefully. "
|
| 146 |
+
"Return ONLY the exact final answer with no explanation."
|
| 147 |
+
),
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
if route == "code_file":
|
| 151 |
+
return self._solve_with_llm(
|
| 152 |
+
question=question,
|
| 153 |
+
artifact=artifact,
|
| 154 |
+
route=route,
|
| 155 |
+
extra_instructions=(
|
| 156 |
+
"This task appears to involve attached Python code. "
|
| 157 |
+
"Reason carefully over the provided code context if available. "
|
| 158 |
+
"Return ONLY the exact final answer with no explanation."
|
| 159 |
+
),
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
if route == "audio":
|
| 163 |
+
return self._solve_with_llm(
|
| 164 |
+
question=question,
|
| 165 |
+
artifact=artifact,
|
| 166 |
+
route=route,
|
| 167 |
+
extra_instructions=(
|
| 168 |
+
"This task appears to involve audio. "
|
| 169 |
+
"If no transcript is available in context, infer conservatively. "
|
| 170 |
+
"Return ONLY the exact final answer with no explanation."
|
| 171 |
+
),
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
if route == "image":
|
| 175 |
+
return self._solve_with_llm(
|
| 176 |
+
question=question,
|
| 177 |
+
artifact=artifact,
|
| 178 |
+
route=route,
|
| 179 |
+
extra_instructions=(
|
| 180 |
+
"This task appears to involve an image or visual reasoning. "
|
| 181 |
+
"Use any available context carefully and return ONLY the final answer."
|
| 182 |
+
),
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
if route == "video":
|
| 186 |
+
return self._solve_with_llm(
|
| 187 |
+
question=question,
|
| 188 |
+
artifact=artifact,
|
| 189 |
+
route=route,
|
| 190 |
+
extra_instructions=(
|
| 191 |
+
"This task appears to involve a video. "
|
| 192 |
+
"Return ONLY the exact final answer with no explanation."
|
| 193 |
+
),
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
if route == "web_lookup":
|
| 197 |
+
return self._solve_with_llm(
|
| 198 |
+
question=question,
|
| 199 |
+
artifact=artifact,
|
| 200 |
+
route=route,
|
| 201 |
+
extra_instructions=(
|
| 202 |
+
"This task appears to require factual lookup or multi-hop retrieval. "
|
| 203 |
+
"Return ONLY the exact final answer with no explanation."
|
| 204 |
+
),
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
if route == "text_file":
|
| 208 |
+
return self._solve_with_llm(
|
| 209 |
+
question=question,
|
| 210 |
+
artifact=artifact,
|
| 211 |
+
route=route,
|
| 212 |
+
extra_instructions=(
|
| 213 |
+
"Use the attached text file context carefully. "
|
| 214 |
+
"Return ONLY the exact final answer with no explanation."
|
| 215 |
+
),
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
return self._solve_with_llm(
|
| 219 |
+
question=question,
|
| 220 |
+
artifact=artifact,
|
| 221 |
+
route=route,
|
| 222 |
+
extra_instructions="Return ONLY the exact final answer with no explanation.",
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
def _solve_reverse_text(self, question: str) -> str:
|
| 226 |
+
raw = (question or "").strip()
|
| 227 |
+
if not raw:
|
| 228 |
+
return ""
|
| 229 |
+
|
| 230 |
+
reversed_question = raw[::-1]
|
| 231 |
+
|
| 232 |
+
if not self._looks_english_like(reversed_question):
|
| 233 |
return ""
|
| 234 |
|
| 235 |
+
rq = reversed_question.lower()
|
| 236 |
+
|
| 237 |
+
quoted = re.search(r'word\s+"([^"]+)"', rq)
|
| 238 |
+
target_word = quoted.group(1).strip() if quoted else ""
|
| 239 |
+
|
| 240 |
+
if "opposite" in rq and target_word:
|
| 241 |
+
opposite = self._simple_opposite_word(target_word)
|
| 242 |
+
if opposite:
|
| 243 |
+
return opposite
|
| 244 |
|
| 245 |
+
if "left" in rq and "opposite" in rq:
|
| 246 |
+
return "right"
|
| 247 |
+
if "right" in rq and "opposite" in rq:
|
| 248 |
+
return "left"
|
| 249 |
+
if "up" in rq and "opposite" in rq:
|
| 250 |
+
return "down"
|
| 251 |
+
if "down" in rq and "opposite" in rq:
|
| 252 |
+
return "up"
|
| 253 |
+
|
| 254 |
+
return ""
|
| 255 |
+
|
| 256 |
+
def _solve_with_llm(
|
| 257 |
+
self,
|
| 258 |
+
question: str,
|
| 259 |
+
artifact: TaskArtifact,
|
| 260 |
+
route: str,
|
| 261 |
+
extra_instructions: str = "",
|
| 262 |
+
) -> str:
|
| 263 |
+
prompt = self._build_prompt(
|
| 264 |
+
question=question,
|
| 265 |
+
artifact=artifact,
|
| 266 |
+
route=route,
|
| 267 |
+
extra_instructions=extra_instructions,
|
| 268 |
+
)
|
| 269 |
|
| 270 |
try:
|
| 271 |
return self.llm_client.generate(prompt)
|
| 272 |
except Exception as e:
|
| 273 |
+
print(f"LLM generation error on route '{route}': {e}")
|
| 274 |
return ""
|
| 275 |
+
|
| 276 |
+
def _build_prompt(
|
| 277 |
+
self,
|
| 278 |
+
question: str,
|
| 279 |
+
artifact: TaskArtifact,
|
| 280 |
+
route: str,
|
| 281 |
+
extra_instructions: str = "",
|
| 282 |
+
) -> str:
|
| 283 |
+
parts = []
|
| 284 |
+
|
| 285 |
+
if artifact.exists:
|
| 286 |
+
parts.append(f"[Attached file name]\n{artifact.file_name or 'unknown'}")
|
| 287 |
+
parts.append(f"[Attached file suffix]\n{artifact.suffix or 'unknown'}")
|
| 288 |
+
|
| 289 |
+
if route:
|
| 290 |
+
parts.append(f"[Detected task type]\n{route}")
|
| 291 |
+
|
| 292 |
+
if artifact.text_context:
|
| 293 |
+
preview = artifact.text_context[: self.config.max_file_preview_chars]
|
| 294 |
+
parts.append(f"[Attached file extracted context]\n{preview}")
|
| 295 |
+
|
| 296 |
+
if extra_instructions:
|
| 297 |
+
parts.append(f"[Important instructions]\n{extra_instructions}")
|
| 298 |
+
|
| 299 |
+
merged_context = "\n\n".join(parts).strip()
|
| 300 |
+
|
| 301 |
+
try:
|
| 302 |
+
return build_solver_prompt(question=question, context=merged_context)
|
| 303 |
+
except TypeError:
|
| 304 |
+
return build_solver_prompt(question, merged_context)
|
| 305 |
+
|
| 306 |
+
def _normalize_answer(self, question: str, answer: str) -> str:
|
| 307 |
+
try:
|
| 308 |
+
sig = inspect.signature(normalize_final_answer)
|
| 309 |
+
if len(sig.parameters) == 2:
|
| 310 |
+
return normalize_final_answer(question, answer)
|
| 311 |
+
except Exception:
|
| 312 |
+
pass
|
| 313 |
+
|
| 314 |
+
try:
|
| 315 |
+
return normalize_final_answer(question, answer)
|
| 316 |
+
except TypeError:
|
| 317 |
+
return answer.strip() if answer else ""
|
| 318 |
+
|
| 319 |
+
@staticmethod
|
| 320 |
+
def _looks_like_reversed_text(text: str) -> bool:
|
| 321 |
+
if not text:
|
| 322 |
+
return False
|
| 323 |
+
|
| 324 |
+
reversed_markers = [
|
| 325 |
+
"uoy fi",
|
| 326 |
+
"dnatsrednu",
|
| 327 |
+
"rewsna",
|
| 328 |
+
"etirw",
|
| 329 |
+
"tfel",
|
| 330 |
+
]
|
| 331 |
+
if any(marker in text for marker in reversed_markers):
|
| 332 |
+
return True
|
| 333 |
+
|
| 334 |
+
if text.startswith(".") and " the " not in f" {text} ":
|
| 335 |
+
return True
|
| 336 |
+
|
| 337 |
+
return False
|
| 338 |
+
|
| 339 |
+
@staticmethod
|
| 340 |
+
def _looks_english_like(text: str) -> bool:
|
| 341 |
+
if not text:
|
| 342 |
+
return False
|
| 343 |
+
|
| 344 |
+
common_words = [
|
| 345 |
+
" the ",
|
| 346 |
+
" and ",
|
| 347 |
+
" if ",
|
| 348 |
+
" you ",
|
| 349 |
+
" answer ",
|
| 350 |
+
" write ",
|
| 351 |
+
" word ",
|
| 352 |
+
" opposite ",
|
| 353 |
+
]
|
| 354 |
+
padded = f" {text.lower()} "
|
| 355 |
+
hits = sum(1 for w in common_words if w in padded)
|
| 356 |
+
return hits >= 2
|
| 357 |
+
|
| 358 |
+
@staticmethod
|
| 359 |
+
def _simple_opposite_word(word: str) -> str:
|
| 360 |
+
opposites = {
|
| 361 |
+
"left": "right",
|
| 362 |
+
"right": "left",
|
| 363 |
+
"up": "down",
|
| 364 |
+
"down": "up",
|
| 365 |
+
"true": "false",
|
| 366 |
+
"false": "true",
|
| 367 |
+
"yes": "no",
|
| 368 |
+
"no": "yes",
|
| 369 |
+
"hot": "cold",
|
| 370 |
+
"cold": "hot",
|
| 371 |
+
"open": "closed",
|
| 372 |
+
"closed": "open",
|
| 373 |
+
"in": "out",
|
| 374 |
+
"out": "in",
|
| 375 |
+
"before": "after",
|
| 376 |
+
"after": "before",
|
| 377 |
+
}
|
| 378 |
+
return opposites.get(word.strip().lower(), "")
|
llm_client.py
CHANGED
|
@@ -1,55 +1,90 @@
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
-
from
|
|
|
|
| 3 |
from dotenv import load_dotenv
|
|
|
|
| 4 |
|
| 5 |
load_dotenv()
|
| 6 |
|
| 7 |
|
| 8 |
class HFLLMClient:
|
| 9 |
-
def __init__(self):
|
| 10 |
self.api_key = os.getenv("HF_TOKEN")
|
| 11 |
print("HF token present:", bool(self.api_key))
|
| 12 |
|
| 13 |
if not self.api_key:
|
| 14 |
raise ValueError("HF_TOKEN is not set")
|
| 15 |
|
| 16 |
-
self.model = "Qwen/Qwen2.5-7B-Instruct"
|
|
|
|
|
|
|
|
|
|
| 17 |
self.client = InferenceClient(
|
| 18 |
provider="auto",
|
| 19 |
api_key=self.api_key,
|
| 20 |
)
|
| 21 |
|
| 22 |
def generate(self, prompt: str) -> str:
|
|
|
|
|
|
|
|
|
|
| 23 |
try:
|
| 24 |
output = self.client.chat_completion(
|
| 25 |
model=self.model,
|
| 26 |
messages=[
|
| 27 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
],
|
| 29 |
-
max_tokens=
|
| 30 |
-
temperature=
|
| 31 |
)
|
| 32 |
|
| 33 |
-
text =
|
| 34 |
-
print("LLM response preview:",
|
| 35 |
-
return
|
| 36 |
|
| 37 |
except Exception as e:
|
| 38 |
-
raise ValueError(f"Inference call failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
| 43 |
|
| 44 |
-
|
|
|
|
| 45 |
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
# # Keep this as a lightweight optional fallback.
|
| 53 |
-
# # If you later connect a provider, do it here.
|
| 54 |
-
# # For now, fail cleanly so tool-based paths still work.
|
| 55 |
-
# raise RuntimeError("No free LLM fallback configured.")
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
import os
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
+
from huggingface_hub import InferenceClient
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
| 11 |
|
| 12 |
class HFLLMClient:
|
| 13 |
+
def __init__(self) -> None:
|
| 14 |
self.api_key = os.getenv("HF_TOKEN")
|
| 15 |
print("HF token present:", bool(self.api_key))
|
| 16 |
|
| 17 |
if not self.api_key:
|
| 18 |
raise ValueError("HF_TOKEN is not set")
|
| 19 |
|
| 20 |
+
self.model = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
|
| 21 |
+
self.max_tokens = int(os.getenv("HF_MAX_TOKENS", "128"))
|
| 22 |
+
self.temperature = float(os.getenv("HF_TEMPERATURE", "0.1"))
|
| 23 |
+
|
| 24 |
self.client = InferenceClient(
|
| 25 |
provider="auto",
|
| 26 |
api_key=self.api_key,
|
| 27 |
)
|
| 28 |
|
| 29 |
def generate(self, prompt: str) -> str:
|
| 30 |
+
"""
|
| 31 |
+
Generate a deterministic short answer for benchmark submission tasks.
|
| 32 |
+
"""
|
| 33 |
try:
|
| 34 |
output = self.client.chat_completion(
|
| 35 |
model=self.model,
|
| 36 |
messages=[
|
| 37 |
+
{
|
| 38 |
+
"role": "system",
|
| 39 |
+
"content": (
|
| 40 |
+
"You are an exact-match benchmark solver. "
|
| 41 |
+
"Return only the final answer with no explanation."
|
| 42 |
+
),
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"role": "user",
|
| 46 |
+
"content": prompt,
|
| 47 |
+
},
|
| 48 |
],
|
| 49 |
+
max_tokens=self.max_tokens,
|
| 50 |
+
temperature=self.temperature,
|
| 51 |
)
|
| 52 |
|
| 53 |
+
text = self._extract_text(output)
|
| 54 |
+
print("LLM response preview:", text[:300])
|
| 55 |
+
return text
|
| 56 |
|
| 57 |
except Exception as e:
|
| 58 |
+
raise ValueError(f"Inference call failed: {e}") from e
|
| 59 |
+
|
| 60 |
+
@staticmethod
|
| 61 |
+
def _extract_text(output: Any) -> str:
|
| 62 |
+
"""
|
| 63 |
+
Safely extract text from HF chat completion responses.
|
| 64 |
+
"""
|
| 65 |
+
if output is None:
|
| 66 |
+
return ""
|
| 67 |
|
| 68 |
+
try:
|
| 69 |
+
text = output.choices[0].message.content
|
| 70 |
+
except Exception:
|
| 71 |
+
return ""
|
| 72 |
|
| 73 |
+
if text is None:
|
| 74 |
+
return ""
|
| 75 |
|
| 76 |
+
if isinstance(text, str):
|
| 77 |
+
return text.strip()
|
| 78 |
|
| 79 |
+
if isinstance(text, list):
|
| 80 |
+
parts = []
|
| 81 |
+
for item in text:
|
| 82 |
+
if isinstance(item, dict):
|
| 83 |
+
piece = item.get("text") or item.get("content") or ""
|
| 84 |
+
if piece:
|
| 85 |
+
parts.append(str(piece))
|
| 86 |
+
elif item is not None:
|
| 87 |
+
parts.append(str(item))
|
| 88 |
+
return " ".join(parts).strip()
|
| 89 |
|
| 90 |
+
return str(text).strip()
|
|
|
|
|
|
|
|
|
|
|
|
prompts.py
CHANGED
|
@@ -2,56 +2,62 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
|
| 4 |
SYSTEM_PROMPT = """
|
| 5 |
-
You are a benchmark-solving AI agent.
|
| 6 |
-
|
| 7 |
-
Your
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
- Return
|
| 11 |
-
-
|
| 12 |
-
- Do
|
| 13 |
-
- Do
|
| 14 |
-
-
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
- If the
|
| 20 |
-
-
|
| 21 |
-
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
|
| 28 |
def build_solver_prompt(question: str, context: str = "") -> str:
|
| 29 |
"""
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"""
|
| 33 |
|
| 34 |
-
if context:
|
| 35 |
-
|
| 36 |
{SYSTEM_PROMPT}
|
| 37 |
|
| 38 |
-
|
| 39 |
{context}
|
| 40 |
|
| 41 |
Question:
|
| 42 |
{question}
|
| 43 |
|
| 44 |
-
Return only the final answer.
|
| 45 |
-
"""
|
| 46 |
-
|
| 47 |
-
|
| 48 |
{SYSTEM_PROMPT}
|
| 49 |
|
| 50 |
Question:
|
| 51 |
{question}
|
| 52 |
|
| 53 |
-
Return only the final answer.
|
| 54 |
-
"""
|
| 55 |
-
|
| 56 |
-
return prompt.strip()
|
| 57 |
-
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
SYSTEM_PROMPT = """
|
| 5 |
+
You are a benchmark-solving AI agent for exact-match evaluation.
|
| 6 |
+
|
| 7 |
+
Your job is to produce the single best final answer for the given question.
|
| 8 |
+
|
| 9 |
+
Core rules:
|
| 10 |
+
- Return ONLY the final answer.
|
| 11 |
+
- Do NOT explain your reasoning.
|
| 12 |
+
- Do NOT include analysis, notes, labels, or extra words.
|
| 13 |
+
- Do NOT say things like "Final answer:" or "The answer is".
|
| 14 |
+
- If context is provided, use it carefully.
|
| 15 |
+
- If the task mentions a strict output format, follow it exactly.
|
| 16 |
+
- If the question asks for only part of a name, return only that requested part.
|
| 17 |
+
- If the question asks for a list, return only the list.
|
| 18 |
+
- If the question asks for sorting, alphabetizing, or ascending order, obey it exactly.
|
| 19 |
+
- If the question asks for a code, abbreviation, city, surname, first name, or numeric value only, return only that.
|
| 20 |
+
- Do not invent unsupported facts.
|
| 21 |
+
- Prefer precision over verbosity.
|
| 22 |
+
|
| 23 |
+
Exact-match formatting rules:
|
| 24 |
+
- Numbers: output only the number.
|
| 25 |
+
- Dates: output only the requested date string.
|
| 26 |
+
- Names: output only the requested portion of the name.
|
| 27 |
+
- Lists: output only the list items in the requested delimiter format.
|
| 28 |
+
- Sentences: output a full sentence only if the answer itself must be a sentence.
|
| 29 |
+
- Punctuation: do not add extra punctuation unless required by the answer.
|
| 30 |
"""
|
| 31 |
|
| 32 |
|
| 33 |
def build_solver_prompt(question: str, context: str = "") -> str:
|
| 34 |
"""
|
| 35 |
+
Build the final prompt sent to the model.
|
| 36 |
+
Context may include:
|
| 37 |
+
- attached file metadata
|
| 38 |
+
- extracted file text
|
| 39 |
+
- detected task type
|
| 40 |
+
- route-specific instructions
|
| 41 |
"""
|
| 42 |
|
| 43 |
+
if context and context.strip():
|
| 44 |
+
return f"""
|
| 45 |
{SYSTEM_PROMPT}
|
| 46 |
|
| 47 |
+
Available context:
|
| 48 |
{context}
|
| 49 |
|
| 50 |
Question:
|
| 51 |
{question}
|
| 52 |
|
| 53 |
+
Return only the exact final answer.
|
| 54 |
+
""".strip()
|
| 55 |
+
|
| 56 |
+
return f"""
|
| 57 |
{SYSTEM_PROMPT}
|
| 58 |
|
| 59 |
Question:
|
| 60 |
{question}
|
| 61 |
|
| 62 |
+
Return only the exact final answer.
|
| 63 |
+
""".strip()
|
|
|
|
|
|
|
|
|
tools.py
CHANGED
|
@@ -1,35 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
-
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
| 6 |
from typing import Optional
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
import requests
|
| 9 |
|
|
|
|
| 10 |
class TaskFileTool:
|
| 11 |
"""
|
| 12 |
-
Downloads and reads task-linked files from the Hugging Face
|
| 13 |
-
Unit 4 scoring API.
|
| 14 |
|
| 15 |
-
Supported text
|
| 16 |
- txt
|
| 17 |
- csv
|
| 18 |
- json
|
| 19 |
- md
|
| 20 |
- html
|
| 21 |
- xml
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
For unsupported
|
| 24 |
-
|
|
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
-
def __init__(self, api_base_url: str, cache_dir:str = "task_files", timeout: int =30):
|
| 28 |
self.api_base_url = api_base_url.rstrip("/")
|
| 29 |
self.cache_dir = Path(cache_dir)
|
| 30 |
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
| 31 |
self.timeout = timeout
|
| 32 |
-
|
| 33 |
def get_task_context(self, task_id: str) -> str:
|
| 34 |
"""
|
| 35 |
Main entry point used by the agent:
|
|
@@ -49,16 +263,16 @@ class TaskFileTool:
|
|
| 49 |
Returns:
|
| 50 |
Path to saved file if successful, else None
|
| 51 |
"""
|
| 52 |
-
url = f"{self.api_base_url}/
|
| 53 |
|
| 54 |
try:
|
| 55 |
response = requests.get(url, timeout=self.timeout)
|
| 56 |
except requests.RequestException:
|
| 57 |
return None
|
| 58 |
-
|
| 59 |
-
if response.status_code !=200:
|
| 60 |
return None
|
| 61 |
-
|
| 62 |
filename = self._infer_filename(response=response, task_id=task_id)
|
| 63 |
file_path = self.cache_dir / filename
|
| 64 |
|
|
@@ -68,19 +282,20 @@ class TaskFileTool:
|
|
| 68 |
return file_path
|
| 69 |
except OSError:
|
| 70 |
return None
|
| 71 |
-
|
| 72 |
-
|
| 73 |
def read_file_as_text(self, file_path: Path) -> str:
|
| 74 |
"""
|
| 75 |
-
Reads supported file types into plain text.
|
| 76 |
"""
|
| 77 |
suffix = file_path.suffix.lower()
|
| 78 |
|
| 79 |
try:
|
| 80 |
-
if suffix in {".txt", ".md", ".html", ".xml", ".csv", ".json"}:
|
| 81 |
return self._read_supported_text_file(file_path, suffix)
|
| 82 |
|
| 83 |
-
|
|
|
|
|
|
|
| 84 |
if suffix == "":
|
| 85 |
return self._read_extensionless_file(file_path)
|
| 86 |
|
|
@@ -89,7 +304,7 @@ class TaskFileTool:
|
|
| 89 |
return ""
|
| 90 |
|
| 91 |
def _read_supported_text_file(self, file_path: Path, suffix: str) -> str:
|
| 92 |
-
if suffix in {".txt", ".md", ".html", ".xml"}:
|
| 93 |
return file_path.read_text(encoding="utf-8", errors="ignore")
|
| 94 |
|
| 95 |
if suffix == ".json":
|
|
@@ -109,6 +324,27 @@ class TaskFileTool:
|
|
| 109 |
|
| 110 |
return ""
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
def _read_extensionless_file(self, file_path: Path) -> str:
|
| 113 |
"""
|
| 114 |
Try to interpret extensionless files as utf-8 text first.
|
|
@@ -166,6 +402,21 @@ class TaskFileTool:
|
|
| 166 |
"text/html": ".html",
|
| 167 |
"application/xml": ".xml",
|
| 168 |
"text/xml": ".xml",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
}
|
| 170 |
|
| 171 |
for key, ext in mapping.items():
|
|
@@ -179,4 +430,4 @@ class TaskFileTool:
|
|
| 179 |
"""
|
| 180 |
Prevent path traversal and weird path issues.
|
| 181 |
"""
|
| 182 |
-
return os.path.basename(filename)
|
|
|
|
| 1 |
+
# from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# import json
|
| 4 |
+
# import os
|
| 5 |
+
# from dataclasses import dataclass
|
| 6 |
+
# from pathlib import Path
|
| 7 |
+
# from typing import Optional
|
| 8 |
+
|
| 9 |
+
# import pandas as pd
|
| 10 |
+
# import requests
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# @dataclass
|
| 14 |
+
# class TaskFileInfo:
|
| 15 |
+
# task_id: str
|
| 16 |
+
# exists: bool
|
| 17 |
+
# url: Optional[str]
|
| 18 |
+
# file_path: Optional[Path]
|
| 19 |
+
# file_name: Optional[str]
|
| 20 |
+
# suffix: str
|
| 21 |
+
# content_type: str
|
| 22 |
+
# extracted_text: str
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# class TaskFileTool:
|
| 26 |
+
# def __init__(self, api_base_url: str, cache_dir: str = "task_files", timeout: int = 30):
|
| 27 |
+
# self.api_base_url = api_base_url.rstrip("/")
|
| 28 |
+
# self.cache_dir = Path(cache_dir)
|
| 29 |
+
# self.cache_dir.mkdir(parents=True, exist_ok=True)
|
| 30 |
+
# self.timeout = timeout
|
| 31 |
+
|
| 32 |
+
# def inspect_task_file(self, task_id: str) -> TaskFileInfo:
|
| 33 |
+
# url = f"{self.api_base_url}/files/{task_id}"
|
| 34 |
+
|
| 35 |
+
# try:
|
| 36 |
+
# response = requests.get(url, timeout=self.timeout)
|
| 37 |
+
# except requests.RequestException:
|
| 38 |
+
# return TaskFileInfo(
|
| 39 |
+
# task_id=task_id,
|
| 40 |
+
# exists=False,
|
| 41 |
+
# url=url,
|
| 42 |
+
# file_path=None,
|
| 43 |
+
# file_name=None,
|
| 44 |
+
# suffix="",
|
| 45 |
+
# content_type="",
|
| 46 |
+
# extracted_text="",
|
| 47 |
+
# )
|
| 48 |
+
|
| 49 |
+
# if response.status_code != 200:
|
| 50 |
+
# return TaskFileInfo(
|
| 51 |
+
# task_id=task_id,
|
| 52 |
+
# exists=False,
|
| 53 |
+
# url=url,
|
| 54 |
+
# file_path=None,
|
| 55 |
+
# file_name=None,
|
| 56 |
+
# suffix="",
|
| 57 |
+
# content_type=response.headers.get("content-type", ""),
|
| 58 |
+
# extracted_text="",
|
| 59 |
+
# )
|
| 60 |
+
|
| 61 |
+
# filename = self._infer_filename(response=response, task_id=task_id)
|
| 62 |
+
# file_path = self.cache_dir / filename
|
| 63 |
+
# content_type = response.headers.get("content-type", "").lower()
|
| 64 |
+
|
| 65 |
+
# try:
|
| 66 |
+
# with open(file_path, "wb") as f:
|
| 67 |
+
# f.write(response.content)
|
| 68 |
+
# except OSError:
|
| 69 |
+
# return TaskFileInfo(
|
| 70 |
+
# task_id=task_id,
|
| 71 |
+
# exists=False,
|
| 72 |
+
# url=url,
|
| 73 |
+
# file_path=None,
|
| 74 |
+
# file_name=filename,
|
| 75 |
+
# suffix=Path(filename).suffix.lower(),
|
| 76 |
+
# content_type=content_type,
|
| 77 |
+
# extracted_text="",
|
| 78 |
+
# )
|
| 79 |
+
|
| 80 |
+
# extracted_text = self.read_file_as_text(file_path)
|
| 81 |
+
|
| 82 |
+
# return TaskFileInfo(
|
| 83 |
+
# task_id=task_id,
|
| 84 |
+
# exists=True,
|
| 85 |
+
# url=url,
|
| 86 |
+
# file_path=file_path,
|
| 87 |
+
# file_name=file_path.name,
|
| 88 |
+
# suffix=file_path.suffix.lower(),
|
| 89 |
+
# content_type=content_type,
|
| 90 |
+
# extracted_text=extracted_text,
|
| 91 |
+
# )
|
| 92 |
+
|
| 93 |
+
# def get_task_context(self, task_id: str) -> str:
|
| 94 |
+
# info = self.inspect_task_file(task_id)
|
| 95 |
+
# return info.extracted_text
|
| 96 |
+
|
| 97 |
+
# def read_file_as_text(self, file_path: Path) -> str:
|
| 98 |
+
# suffix = file_path.suffix.lower()
|
| 99 |
+
|
| 100 |
+
# try:
|
| 101 |
+
# if suffix in {".txt", ".md", ".html", ".xml", ".json", ".csv", ".py"}:
|
| 102 |
+
# return self._read_supported_text_file(file_path, suffix)
|
| 103 |
+
|
| 104 |
+
# if suffix in {".xlsx", ".xls"}:
|
| 105 |
+
# return self._read_excel_preview(file_path)
|
| 106 |
+
|
| 107 |
+
# if suffix == "":
|
| 108 |
+
# return self._read_extensionless_file(file_path)
|
| 109 |
+
|
| 110 |
+
# return ""
|
| 111 |
+
# except Exception:
|
| 112 |
+
# return ""
|
| 113 |
+
|
| 114 |
+
# def _read_supported_text_file(self, file_path: Path, suffix: str) -> str:
|
| 115 |
+
# if suffix in {".txt", ".md", ".html", ".xml", ".py"}:
|
| 116 |
+
# return file_path.read_text(encoding="utf-8", errors="ignore")
|
| 117 |
+
|
| 118 |
+
# if suffix == ".json":
|
| 119 |
+
# raw = file_path.read_text(encoding="utf-8", errors="ignore")
|
| 120 |
+
# try:
|
| 121 |
+
# parsed = json.loads(raw)
|
| 122 |
+
# return json.dumps(parsed, indent=2, ensure_ascii=False)
|
| 123 |
+
# except json.JSONDecodeError:
|
| 124 |
+
# return raw
|
| 125 |
+
|
| 126 |
+
# if suffix == ".csv":
|
| 127 |
+
# try:
|
| 128 |
+
# df = pd.read_csv(file_path)
|
| 129 |
+
# return df.to_csv(index=False)
|
| 130 |
+
# except Exception:
|
| 131 |
+
# return file_path.read_text(encoding="utf-8", errors="ignore")
|
| 132 |
+
|
| 133 |
+
# return ""
|
| 134 |
+
|
| 135 |
+
# def _read_excel_preview(self, file_path: Path) -> str:
|
| 136 |
+
# try:
|
| 137 |
+
# xls = pd.ExcelFile(file_path)
|
| 138 |
+
# chunks = []
|
| 139 |
+
# for sheet_name in xls.sheet_names[:5]:
|
| 140 |
+
# df = pd.read_excel(file_path, sheet_name=sheet_name)
|
| 141 |
+
# chunks.append(f"Sheet: {sheet_name}")
|
| 142 |
+
# chunks.append(df.head(20).to_csv(index=False))
|
| 143 |
+
# return "\n\n".join(chunks)
|
| 144 |
+
# except Exception:
|
| 145 |
+
# return ""
|
| 146 |
+
|
| 147 |
+
# def _read_extensionless_file(self, file_path: Path) -> str:
|
| 148 |
+
# try:
|
| 149 |
+
# raw = file_path.read_text(encoding="utf-8", errors="ignore")
|
| 150 |
+
# if raw.strip():
|
| 151 |
+
# return raw
|
| 152 |
+
# except Exception:
|
| 153 |
+
# pass
|
| 154 |
+
# return ""
|
| 155 |
+
|
| 156 |
+
# def _infer_filename(self, response: requests.Response, task_id: str) -> str:
|
| 157 |
+
# content_disposition = response.headers.get("content-disposition", "")
|
| 158 |
+
# filename = self._extract_filename_from_content_disposition(content_disposition)
|
| 159 |
+
|
| 160 |
+
# if filename:
|
| 161 |
+
# return self._safe_filename(filename)
|
| 162 |
+
|
| 163 |
+
# content_type = response.headers.get("content-type", "").lower()
|
| 164 |
+
# extension = self._extension_from_content_type(content_type)
|
| 165 |
+
|
| 166 |
+
# if extension:
|
| 167 |
+
# return f"{task_id}{extension}"
|
| 168 |
+
|
| 169 |
+
# return str(task_id)
|
| 170 |
+
|
| 171 |
+
# @staticmethod
|
| 172 |
+
# def _extract_filename_from_content_disposition(content_disposition: str) -> Optional[str]:
|
| 173 |
+
# if "filename=" not in content_disposition:
|
| 174 |
+
# return None
|
| 175 |
+
# try:
|
| 176 |
+
# filename = content_disposition.split("filename=")[-1].strip().strip('"')
|
| 177 |
+
# return filename or None
|
| 178 |
+
# except Exception:
|
| 179 |
+
# return None
|
| 180 |
+
|
| 181 |
+
# @staticmethod
|
| 182 |
+
# def _extension_from_content_type(content_type: str) -> str:
|
| 183 |
+
# mapping = {
|
| 184 |
+
# "text/plain": ".txt",
|
| 185 |
+
# "text/csv": ".csv",
|
| 186 |
+
# "application/csv": ".csv",
|
| 187 |
+
# "application/json": ".json",
|
| 188 |
+
# "text/markdown": ".md",
|
| 189 |
+
# "text/html": ".html",
|
| 190 |
+
# "application/xml": ".xml",
|
| 191 |
+
# "text/xml": ".xml",
|
| 192 |
+
# "application/pdf": ".pdf",
|
| 193 |
+
# "image/png": ".png",
|
| 194 |
+
# "image/jpeg": ".jpg",
|
| 195 |
+
# "audio/mpeg": ".mp3",
|
| 196 |
+
# "audio/wav": ".wav",
|
| 197 |
+
# "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
| 198 |
+
# "application/vnd.ms-excel": ".xls",
|
| 199 |
+
# "text/x-python": ".py",
|
| 200 |
+
# }
|
| 201 |
+
|
| 202 |
+
# for key, ext in mapping.items():
|
| 203 |
+
# if key in content_type:
|
| 204 |
+
# return ext
|
| 205 |
+
# return ""
|
| 206 |
+
|
| 207 |
+
# @staticmethod
|
| 208 |
+
# def _safe_filename(filename: str) -> str:
|
| 209 |
+
# return os.path.basename(filename)
|
| 210 |
+
|
| 211 |
from __future__ import annotations
|
| 212 |
+
|
| 213 |
import json
|
| 214 |
import os
|
| 215 |
from pathlib import Path
|
| 216 |
from typing import Optional
|
| 217 |
+
|
| 218 |
import pandas as pd
|
| 219 |
import requests
|
| 220 |
|
| 221 |
+
|
| 222 |
class TaskFileTool:
|
| 223 |
"""
|
| 224 |
+
Downloads and reads task-linked files from the Hugging Face Unit 4 scoring API.
|
|
|
|
| 225 |
|
| 226 |
+
Supported text extraction / preview:
|
| 227 |
- txt
|
| 228 |
- csv
|
| 229 |
- json
|
| 230 |
- md
|
| 231 |
- html
|
| 232 |
- xml
|
| 233 |
+
- py
|
| 234 |
+
- xlsx / xls (preview)
|
| 235 |
|
| 236 |
+
For unsupported binary files, get_task_context() safely returns an empty string,
|
| 237 |
+
while download_task_file() still returns the local file path so the agent can route
|
| 238 |
+
by file suffix.
|
| 239 |
"""
|
| 240 |
|
| 241 |
+
def __init__(self, api_base_url: str, cache_dir: str = "task_files", timeout: int = 30):
|
| 242 |
self.api_base_url = api_base_url.rstrip("/")
|
| 243 |
self.cache_dir = Path(cache_dir)
|
| 244 |
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
| 245 |
self.timeout = timeout
|
| 246 |
+
|
| 247 |
def get_task_context(self, task_id: str) -> str:
|
| 248 |
"""
|
| 249 |
Main entry point used by the agent:
|
|
|
|
| 263 |
Returns:
|
| 264 |
Path to saved file if successful, else None
|
| 265 |
"""
|
| 266 |
+
url = f"{self.api_base_url}/files/{task_id}"
|
| 267 |
|
| 268 |
try:
|
| 269 |
response = requests.get(url, timeout=self.timeout)
|
| 270 |
except requests.RequestException:
|
| 271 |
return None
|
| 272 |
+
|
| 273 |
+
if response.status_code != 200:
|
| 274 |
return None
|
| 275 |
+
|
| 276 |
filename = self._infer_filename(response=response, task_id=task_id)
|
| 277 |
file_path = self.cache_dir / filename
|
| 278 |
|
|
|
|
| 282 |
return file_path
|
| 283 |
except OSError:
|
| 284 |
return None
|
| 285 |
+
|
|
|
|
| 286 |
def read_file_as_text(self, file_path: Path) -> str:
|
| 287 |
"""
|
| 288 |
+
Reads supported file types into plain text or lightweight preview text.
|
| 289 |
"""
|
| 290 |
suffix = file_path.suffix.lower()
|
| 291 |
|
| 292 |
try:
|
| 293 |
+
if suffix in {".txt", ".md", ".html", ".xml", ".csv", ".json", ".py"}:
|
| 294 |
return self._read_supported_text_file(file_path, suffix)
|
| 295 |
|
| 296 |
+
if suffix in {".xlsx", ".xls"}:
|
| 297 |
+
return self._read_excel_preview(file_path)
|
| 298 |
+
|
| 299 |
if suffix == "":
|
| 300 |
return self._read_extensionless_file(file_path)
|
| 301 |
|
|
|
|
| 304 |
return ""
|
| 305 |
|
| 306 |
def _read_supported_text_file(self, file_path: Path, suffix: str) -> str:
|
| 307 |
+
if suffix in {".txt", ".md", ".html", ".xml", ".py"}:
|
| 308 |
return file_path.read_text(encoding="utf-8", errors="ignore")
|
| 309 |
|
| 310 |
if suffix == ".json":
|
|
|
|
| 324 |
|
| 325 |
return ""
|
| 326 |
|
| 327 |
+
def _read_excel_preview(self, file_path: Path) -> str:
|
| 328 |
+
"""
|
| 329 |
+
Read a small preview of Excel sheets into text so the LLM has something useful.
|
| 330 |
+
This is not a full spreadsheet solver, just a context preview.
|
| 331 |
+
"""
|
| 332 |
+
try:
|
| 333 |
+
xls = pd.ExcelFile(file_path)
|
| 334 |
+
chunks: list[str] = []
|
| 335 |
+
|
| 336 |
+
for sheet_name in xls.sheet_names[:5]:
|
| 337 |
+
try:
|
| 338 |
+
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
| 339 |
+
chunks.append(f"Sheet: {sheet_name}")
|
| 340 |
+
chunks.append(df.head(20).to_csv(index=False))
|
| 341 |
+
except Exception:
|
| 342 |
+
continue
|
| 343 |
+
|
| 344 |
+
return "\n\n".join(chunks).strip()
|
| 345 |
+
except Exception:
|
| 346 |
+
return ""
|
| 347 |
+
|
| 348 |
def _read_extensionless_file(self, file_path: Path) -> str:
|
| 349 |
"""
|
| 350 |
Try to interpret extensionless files as utf-8 text first.
|
|
|
|
| 402 |
"text/html": ".html",
|
| 403 |
"application/xml": ".xml",
|
| 404 |
"text/xml": ".xml",
|
| 405 |
+
"application/pdf": ".pdf",
|
| 406 |
+
"image/png": ".png",
|
| 407 |
+
"image/jpeg": ".jpg",
|
| 408 |
+
"image/jpg": ".jpg",
|
| 409 |
+
"image/webp": ".webp",
|
| 410 |
+
"audio/mpeg": ".mp3",
|
| 411 |
+
"audio/mp3": ".mp3",
|
| 412 |
+
"audio/wav": ".wav",
|
| 413 |
+
"audio/x-wav": ".wav",
|
| 414 |
+
"audio/mp4": ".m4a",
|
| 415 |
+
"audio/x-m4a": ".m4a",
|
| 416 |
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
| 417 |
+
"application/vnd.ms-excel": ".xls",
|
| 418 |
+
"text/x-python": ".py",
|
| 419 |
+
"text/python": ".py",
|
| 420 |
}
|
| 421 |
|
| 422 |
for key, ext in mapping.items():
|
|
|
|
| 430 |
"""
|
| 431 |
Prevent path traversal and weird path issues.
|
| 432 |
"""
|
| 433 |
+
return os.path.basename(filename)
|
utils.py
CHANGED
|
@@ -1,16 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import re
|
| 4 |
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def extract_final_answer(text: str) -> str:
|
| 7 |
"""
|
| 8 |
Extract the most likely final answer from raw model output.
|
| 9 |
|
| 10 |
-
|
| 11 |
-
-
|
| 12 |
-
-
|
| 13 |
-
- if
|
| 14 |
"""
|
| 15 |
if text is None:
|
| 16 |
return ""
|
|
@@ -19,43 +168,39 @@ def extract_final_answer(text: str) -> str:
|
|
| 19 |
if not text:
|
| 20 |
return ""
|
| 21 |
|
| 22 |
-
# Remove fenced code blocks if the model wraps the answer oddly
|
| 23 |
text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
|
| 24 |
text = re.sub(r"\s*```$", "", text)
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
r"(?
|
| 29 |
-
r"(?
|
| 30 |
-
r"(?
|
| 31 |
-
r"(?i)\bthe answer is\s+",
|
| 32 |
]
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
|
| 40 |
if not lines:
|
| 41 |
return ""
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
|
| 46 |
return lines[-1]
|
| 47 |
|
| 48 |
|
| 49 |
-
def
|
| 50 |
"""
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
Rules:
|
| 54 |
-
- trim outer whitespace
|
| 55 |
-
- collapse internal repeated whitespace
|
| 56 |
-
- remove wrapping quotes if they wrap the full answer
|
| 57 |
-
- remove a single trailing period only for plain word/phrase answers
|
| 58 |
-
but keep decimal numbers and date punctuation intact
|
| 59 |
"""
|
| 60 |
if text is None:
|
| 61 |
return ""
|
|
@@ -64,37 +209,127 @@ def normalize_final_answer(text: str) -> str:
|
|
| 64 |
if not text:
|
| 65 |
return ""
|
| 66 |
|
| 67 |
-
# Collapse repeated whitespace
|
| 68 |
text = re.sub(r"\s+", " ", text).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
|
| 71 |
-
if len(text) >= 2:
|
| 72 |
-
if (text[0] == text[-1]) and text[0] in {'"', "'"}:
|
| 73 |
-
text = text[1:-1].strip()
|
| 74 |
|
| 75 |
-
# Remove
|
| 76 |
text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
|
| 77 |
|
| 78 |
-
#
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
if
|
| 82 |
-
text =
|
| 83 |
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
def is_placeholder_answer(text: str) -> bool:
|
| 88 |
"""
|
| 89 |
-
Detect placeholder/fallback outputs
|
| 90 |
"""
|
| 91 |
if text is None:
|
| 92 |
return True
|
| 93 |
|
| 94 |
-
normalized =
|
| 95 |
return normalized in {
|
| 96 |
"",
|
| 97 |
"placeholder",
|
| 98 |
"n/a",
|
| 99 |
"unknown",
|
| 100 |
-
}
|
|
|
|
| 1 |
+
# from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# import re
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# FLUFF_LINES = {
|
| 7 |
+
# "i hope this helps",
|
| 8 |
+
# "hope this helps",
|
| 9 |
+
# "let me know if you need anything else",
|
| 10 |
+
# "thanks",
|
| 11 |
+
# }
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# def extract_final_answer(text: str) -> str:
|
| 15 |
+
# if text is None:
|
| 16 |
+
# return ""
|
| 17 |
+
|
| 18 |
+
# text = str(text).strip()
|
| 19 |
+
# if not text:
|
| 20 |
+
# return ""
|
| 21 |
+
|
| 22 |
+
# text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
|
| 23 |
+
# text = re.sub(r"\s*```$", "", text)
|
| 24 |
+
|
| 25 |
+
# # Strong preference: explicit final-answer style markers
|
| 26 |
+
# explicit_patterns = [
|
| 27 |
+
# r"(?is)\bfinal answer\s*:\s*(.+)$",
|
| 28 |
+
# r"(?is)\banswer\s*:\s*(.+)$",
|
| 29 |
+
# r"(?is)\bthe answer is\s*:\s*(.+)$",
|
| 30 |
+
# r"(?is)\bthe answer is\s+(.+)$",
|
| 31 |
+
# ]
|
| 32 |
+
# for pattern in explicit_patterns:
|
| 33 |
+
# match = re.search(pattern, text)
|
| 34 |
+
# if match:
|
| 35 |
+
# candidate = match.group(1).strip()
|
| 36 |
+
# candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()]
|
| 37 |
+
# if candidate_lines:
|
| 38 |
+
# return candidate_lines[0]
|
| 39 |
+
|
| 40 |
+
# lines = [line.strip() for line in text.splitlines() if line.strip()]
|
| 41 |
+
# if not lines:
|
| 42 |
+
# return ""
|
| 43 |
+
|
| 44 |
+
# # Prefer short non-fluff lines near the end
|
| 45 |
+
# for line in reversed(lines):
|
| 46 |
+
# normalized = normalize_basic_answer(line).lower()
|
| 47 |
+
# if normalized and normalized not in FLUFF_LINES and len(normalized) <= 200:
|
| 48 |
+
# return line
|
| 49 |
+
|
| 50 |
+
# return lines[-1]
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# def normalize_basic_answer(text: str) -> str:
|
| 54 |
+
# if text is None:
|
| 55 |
+
# return ""
|
| 56 |
+
|
| 57 |
+
# text = str(text).strip()
|
| 58 |
+
# if not text:
|
| 59 |
+
# return ""
|
| 60 |
+
|
| 61 |
+
# text = re.sub(r"\s+", " ", text).strip()
|
| 62 |
+
# text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
|
| 63 |
+
|
| 64 |
+
# if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
|
| 65 |
+
# text = text[1:-1].strip()
|
| 66 |
+
|
| 67 |
+
# if text.endswith(".") and not re.fullmatch(r"\d+\.\d+", text):
|
| 68 |
+
# text = text[:-1].strip()
|
| 69 |
+
|
| 70 |
+
# return text
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# def normalize_final_answer(question: str, text: str) -> str:
|
| 74 |
+
# text = normalize_basic_answer(text)
|
| 75 |
+
# if not text:
|
| 76 |
+
# return ""
|
| 77 |
+
|
| 78 |
+
# q = question.lower()
|
| 79 |
+
|
| 80 |
+
# # first name only
|
| 81 |
+
# if "give only the first name" in q or "first name only" in q:
|
| 82 |
+
# text = re.split(r"\s+", text.strip())[0]
|
| 83 |
+
|
| 84 |
+
# # last name only
|
| 85 |
+
# if "last names only" in q or "use their last names only" in q:
|
| 86 |
+
# parts = [part.strip() for part in text.split(",")]
|
| 87 |
+
# cleaned_parts = []
|
| 88 |
+
# for part in parts:
|
| 89 |
+
# tokens = part.split()
|
| 90 |
+
# cleaned_parts.append(tokens[-1] if tokens else part)
|
| 91 |
+
# text = ", ".join(cleaned_parts)
|
| 92 |
+
|
| 93 |
+
# # city only
|
| 94 |
+
# if "just give me the city name" in q or "city name without abbreviations" in q:
|
| 95 |
+
# text = re.split(r"[,;()\-]", text)[0].strip()
|
| 96 |
+
|
| 97 |
+
# # comma-delimited / comma separated list
|
| 98 |
+
# if "comma separated list" in q or "comma-delimited list" in q or "comma delimited list" in q:
|
| 99 |
+
# parts = [p.strip() for p in re.split(r",|\n", text) if p.strip()]
|
| 100 |
+
# text = ",".join(parts)
|
| 101 |
+
|
| 102 |
+
# # ascending order / alphabetical
|
| 103 |
+
# if "ascending order" in q:
|
| 104 |
+
# try:
|
| 105 |
+
# nums = [int(x.strip()) for x in text.split(",") if x.strip()]
|
| 106 |
+
# text = ",".join(str(n) for n in sorted(nums))
|
| 107 |
+
# except Exception:
|
| 108 |
+
# pass
|
| 109 |
+
|
| 110 |
+
# if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q:
|
| 111 |
+
# parts = [p.strip() for p in text.split(",") if p.strip()]
|
| 112 |
+
# if parts:
|
| 113 |
+
# text = ",".join(sorted(parts, key=lambda x: x.lower()))
|
| 114 |
+
|
| 115 |
+
# # two decimal places
|
| 116 |
+
# if "two decimal places" in q:
|
| 117 |
+
# number_match = re.search(r"-?\d+(?:\.\d+)?", text.replace(",", ""))
|
| 118 |
+
# if number_match:
|
| 119 |
+
# try:
|
| 120 |
+
# value = float(number_match.group(0))
|
| 121 |
+
# text = f"{value:.2f}"
|
| 122 |
+
# except Exception:
|
| 123 |
+
# pass
|
| 124 |
+
|
| 125 |
+
# # IOC code / abbreviations / codes often expected uppercase single token
|
| 126 |
+
# if "ioc country code" in q:
|
| 127 |
+
# text = text.strip().upper()
|
| 128 |
+
|
| 129 |
+
# # algebraic notation answer should be just one move token-like string
|
| 130 |
+
# if "algebraic notation" in q:
|
| 131 |
+
# text = text.strip().split()[0]
|
| 132 |
+
|
| 133 |
+
# return text
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# def is_placeholder_answer(text: str) -> bool:
|
| 137 |
+
# normalized = normalize_basic_answer(text).lower()
|
| 138 |
+
# return normalized in {"", "placeholder", "n/a", "unknown"}
|
| 139 |
+
|
| 140 |
+
|
| 141 |
from __future__ import annotations
|
| 142 |
|
| 143 |
import re
|
| 144 |
|
| 145 |
|
| 146 |
+
_FLUFF_LINES = {
|
| 147 |
+
"i hope this helps",
|
| 148 |
+
"hope this helps",
|
| 149 |
+
"let me know if you need anything else",
|
| 150 |
+
"thanks",
|
| 151 |
+
"thank you",
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
|
| 155 |
def extract_final_answer(text: str) -> str:
|
| 156 |
"""
|
| 157 |
Extract the most likely final answer from raw model output.
|
| 158 |
|
| 159 |
+
Strategy:
|
| 160 |
+
- prefer explicit markers like 'Final answer:'
|
| 161 |
+
- strip code fences
|
| 162 |
+
- if multiline, prefer a short meaningful line near the end
|
| 163 |
"""
|
| 164 |
if text is None:
|
| 165 |
return ""
|
|
|
|
| 168 |
if not text:
|
| 169 |
return ""
|
| 170 |
|
|
|
|
| 171 |
text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
|
| 172 |
text = re.sub(r"\s*```$", "", text)
|
| 173 |
|
| 174 |
+
explicit_patterns = [
|
| 175 |
+
r"(?is)\bfinal answer\s*:\s*(.+)$",
|
| 176 |
+
r"(?is)\banswer\s*:\s*(.+)$",
|
| 177 |
+
r"(?is)\bthe answer is\s*:\s*(.+)$",
|
| 178 |
+
r"(?is)\bthe answer is\s+(.+)$",
|
|
|
|
| 179 |
]
|
| 180 |
|
| 181 |
+
for pattern in explicit_patterns:
|
| 182 |
+
match = re.search(pattern, text)
|
| 183 |
+
if match:
|
| 184 |
+
candidate = match.group(1).strip()
|
| 185 |
+
candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()]
|
| 186 |
+
if candidate_lines:
|
| 187 |
+
return candidate_lines[0]
|
| 188 |
|
| 189 |
+
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
|
|
|
| 190 |
if not lines:
|
| 191 |
return ""
|
| 192 |
|
| 193 |
+
for line in reversed(lines):
|
| 194 |
+
normalized = normalize_basic_answer(line).lower()
|
| 195 |
+
if normalized and normalized not in _FLUFF_LINES and len(normalized) <= 200:
|
| 196 |
+
return line
|
| 197 |
|
| 198 |
return lines[-1]
|
| 199 |
|
| 200 |
|
| 201 |
+
def normalize_basic_answer(text: str) -> str:
|
| 202 |
"""
|
| 203 |
+
Basic cleanup independent of question format.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
"""
|
| 205 |
if text is None:
|
| 206 |
return ""
|
|
|
|
| 209 |
if not text:
|
| 210 |
return ""
|
| 211 |
|
|
|
|
| 212 |
text = re.sub(r"\s+", " ", text).strip()
|
| 213 |
+
text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
|
| 214 |
+
|
| 215 |
+
if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
|
| 216 |
+
text = text[1:-1].strip()
|
| 217 |
+
|
| 218 |
+
if text.endswith(".") and not re.fullmatch(r"-?\d+\.\d+", text):
|
| 219 |
+
text = text[:-1].strip()
|
| 220 |
+
|
| 221 |
+
return text
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def normalize_final_answer(*args: str) -> str:
|
| 225 |
+
"""
|
| 226 |
+
Backward-compatible normalizer.
|
| 227 |
+
|
| 228 |
+
Supports:
|
| 229 |
+
- normalize_final_answer(text)
|
| 230 |
+
- normalize_final_answer(question, text)
|
| 231 |
+
"""
|
| 232 |
+
if len(args) == 1:
|
| 233 |
+
question = ""
|
| 234 |
+
text = args[0]
|
| 235 |
+
elif len(args) == 2:
|
| 236 |
+
question, text = args
|
| 237 |
+
else:
|
| 238 |
+
return ""
|
| 239 |
+
|
| 240 |
+
text = normalize_basic_answer(text)
|
| 241 |
+
if not text:
|
| 242 |
+
return ""
|
| 243 |
|
| 244 |
+
q = (question or "").lower()
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
+
# Remove outer labels once more, conservatively
|
| 247 |
text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
|
| 248 |
|
| 249 |
+
# first name only
|
| 250 |
+
if "give only the first name" in q or "first name only" in q:
|
| 251 |
+
tokens = text.split()
|
| 252 |
+
if tokens:
|
| 253 |
+
text = tokens[0]
|
| 254 |
|
| 255 |
+
# last name only
|
| 256 |
+
if "last names only" in q or "use their last names only" in q:
|
| 257 |
+
parts = [part.strip() for part in text.split(",") if part.strip()]
|
| 258 |
+
if parts:
|
| 259 |
+
cleaned_parts: list[str] = []
|
| 260 |
+
for part in parts:
|
| 261 |
+
tokens = part.split()
|
| 262 |
+
cleaned_parts.append(tokens[-1] if tokens else part)
|
| 263 |
+
text = ", ".join(cleaned_parts)
|
| 264 |
+
|
| 265 |
+
# surname only
|
| 266 |
+
if "what is the surname" in q or "surname of" in q:
|
| 267 |
+
tokens = text.split()
|
| 268 |
+
if tokens:
|
| 269 |
+
text = tokens[-1]
|
| 270 |
+
|
| 271 |
+
# city only
|
| 272 |
+
if "city name without abbreviations" in q or "just give me the city name" in q:
|
| 273 |
+
text = re.split(r"[,;()\-]", text)[0].strip()
|
| 274 |
+
|
| 275 |
+
# IOC code
|
| 276 |
+
if "ioc country code" in q:
|
| 277 |
+
text = text.strip().upper()
|
| 278 |
+
|
| 279 |
+
# algebraic notation
|
| 280 |
+
if "algebraic notation" in q:
|
| 281 |
+
text = text.strip().split()[0]
|
| 282 |
+
|
| 283 |
+
# comma-separated list formatting
|
| 284 |
+
if (
|
| 285 |
+
"comma separated list" in q
|
| 286 |
+
or "comma-separated list" in q
|
| 287 |
+
or "comma delimited list" in q
|
| 288 |
+
or "comma-delimited list" in q
|
| 289 |
+
or "comma separated" in q
|
| 290 |
+
):
|
| 291 |
+
parts = [p.strip() for p in re.split(r",|\n", text) if p.strip()]
|
| 292 |
+
text = ",".join(parts)
|
| 293 |
+
|
| 294 |
+
# ascending order
|
| 295 |
+
if "ascending order" in q:
|
| 296 |
+
try:
|
| 297 |
+
nums = [int(x.strip()) for x in text.split(",") if x.strip()]
|
| 298 |
+
text = ",".join(str(n) for n in sorted(nums))
|
| 299 |
+
except Exception:
|
| 300 |
+
pass
|
| 301 |
+
|
| 302 |
+
# alphabetical order
|
| 303 |
+
if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q:
|
| 304 |
+
parts = [p.strip() for p in text.split(",") if p.strip()]
|
| 305 |
+
if parts:
|
| 306 |
+
text = ",".join(sorted(parts, key=lambda x: x.lower()))
|
| 307 |
+
|
| 308 |
+
# two decimal places
|
| 309 |
+
if "two decimal places" in q:
|
| 310 |
+
compact = text.replace(",", "")
|
| 311 |
+
match = re.search(r"-?\d+(?:\.\d+)?", compact)
|
| 312 |
+
if match:
|
| 313 |
+
try:
|
| 314 |
+
value = float(match.group(0))
|
| 315 |
+
text = f"{value:.2f}"
|
| 316 |
+
except Exception:
|
| 317 |
+
pass
|
| 318 |
+
|
| 319 |
+
return text.strip()
|
| 320 |
|
| 321 |
|
| 322 |
def is_placeholder_answer(text: str) -> bool:
|
| 323 |
"""
|
| 324 |
+
Detect placeholder/fallback outputs.
|
| 325 |
"""
|
| 326 |
if text is None:
|
| 327 |
return True
|
| 328 |
|
| 329 |
+
normalized = normalize_basic_answer(text).lower()
|
| 330 |
return normalized in {
|
| 331 |
"",
|
| 332 |
"placeholder",
|
| 333 |
"n/a",
|
| 334 |
"unknown",
|
| 335 |
+
}
|