Final_Assignment_Template1

Sleeping

Raj

Upload 2 files

d2c906a verified 11 days ago

10.2 kB

	"""GAIA agent: smolagents CodeAgent + HF Inference Providers backend.

	Pipeline per question:
	1. Eager-download `/files/{task_id}` so attachments are never missed.
	2. Run the CodeAgent N times (self-consistency).
	3. Majority-vote across normalized candidate answers.
	4. Verifier LLM pass: re-asks the model to pick the best answer in the
	canonical GAIA `FINAL ANSWER:` format given the candidates.
	5. Final string normalization for exact-match.
	"""
	from __future__ import annotations

	import os
	import re
	from collections import Counter
	from typing import Optional

	from huggingface_hub import InferenceClient
	from smolagents import CodeAgent, InferenceClientModel

	import config
	from tools import (
	analyze_image,
	download_task_file,
	read_table,
	read_webpage,
	transcribe_audio,
	web_search,
	wikipedia_search,
	youtube_transcript,
	)

	SYSTEM_PROMPT = """You are a general AI assistant solving questions from the GAIA benchmark.

	I will ask you a question. Think step by step using the tools available. When you
	are confident, finish your answer with the following template:

	FINAL ANSWER: [YOUR FINAL ANSWER]

	YOUR FINAL ANSWER must obey these rules exactly:
	- It is a number, OR as few words as possible, OR a comma separated list of
	numbers and/or strings.
	- If a number: digits only. No commas inside numbers. No units ($, %, kg, etc.)
	unless the question explicitly asks for them.
	- If a string: no articles (a, an, the), no abbreviations (write "Saint Louis"
	not "St. Louis", "New York City" not "NYC"), and digits in plain text unless
	the question asks for numerals.
	- If a comma separated list: items separated by ", " (comma + single space),
	in the order requested, each item following the number/string rules above.
	- Do NOT include explanations, units, quotes, or trailing punctuation in the
	FINAL ANSWER line itself.

	Tool playbook:
	- If the question mentions an attached file ("the attached", "this Excel /
	CSV / audio / image / Python file", "the file"), call
	`download_task_file(task_id)` FIRST to get a local path.
	- Spreadsheets / CSV: `read_table(path)` then pandas in Python.
	- Audio: `transcribe_audio(path)`.
	- Image: `analyze_image(path, question)` — phrase the question precisely.
	- YouTube link: `youtube_transcript(url)`.
	- Open web: `web_search(query)` then `read_webpage(url)` on the best hit.
	- Encyclopedic facts: `wikipedia_search(topic)`.
	- Use Python freely for math, parsing, sorting, set operations, etc.

	Self-check before answering:
	- Did you answer the literal question asked? (Not a related one.)
	- Does the format match the rules above?
	- If unsure, do one more verification search.
	"""

	VERIFIER_SYSTEM = """You are a strict GAIA answer formatter.

	You will see a GAIA question and several candidate answers produced by an agent.
	Pick the single best answer and reformat it to obey the GAIA answer format:

	- A number, a short string, or a comma-separated list.
	- Numbers: digits only, no commas, no units unless the question requires them.
	- Strings: no articles, no abbreviations, plain-text digits unless asked.
	- Comma-separated lists: ", " between items, in the order requested.
	- No explanations. No quotes. No trailing punctuation.

	Respond on a SINGLE LINE in exactly this template:
	FINAL ANSWER: <the answer>
	"""


	class GaiaAgent:
	"""CodeAgent + self-consistency + verifier."""

	def __init__(self, model_id: Optional[str] = None, provider: Optional[str] = None):
	token = os.getenv("HF_TOKEN")
	if not token:
	raise RuntimeError(
	"HF_TOKEN is not set. Add it as a Space secret to use HF Inference."
	)
	self._token = token
	self._model_id = model_id or config.AGENT_MODEL_ID
	self._provider = provider or config.AGENT_PROVIDER
	self.model = InferenceClientModel(
	model_id=self._model_id,
	provider=self._provider,
	token=token,
	max_tokens=config.AGENT_MAX_TOKENS,
	)
	self.tools = [
	web_search,
	read_webpage,
	wikipedia_search,
	youtube_transcript,
	download_task_file,
	read_table,
	transcribe_audio,
	analyze_image,
	]
	self._verifier = InferenceClient(token=token, provider=self._provider)
	self._n = config.SELF_CONSISTENCY_N

	# -------------------- public --------------------
	def __call__(self, question: str, task_id: Optional[str] = None) -> str:
	# 1) Pre-download attachment.
	attachment_line = ""
	if task_id:
	try:
	path = download_task_file(task_id)
	if path and not path.startswith(
	("NO_FILE", "Download error", "Download failed")
	):
	attachment_line = (
	f"An attached file for this task has already been "
	f"downloaded to: {path}\nUse the right tool on this "
	f"path (read_table for spreadsheets/CSV, "
	f"transcribe_audio for audio, analyze_image for "
	f"images, or open it with Python).\n"
	)
	except Exception as e:
	print(f" pre-download skipped: {e}")

	# 2) Self-consistency: N independent CodeAgent runs.
	candidates: list[str] = []
	last_error: Optional[str] = None
	for i in range(self._n):
	try:
	raw = self._single_run(question, task_id, attachment_line)
	except Exception as e:
	last_error = f"{type(e).__name__}: {e}"
	print(f" attempt {i + 1} errored: {last_error}")
	continue
	norm = self._normalize(str(raw))
	print(f" attempt {i + 1}: {norm!r}")
	if norm and not norm.startswith("AGENT ERROR"):
	candidates.append(norm)

	if not candidates:
	return f"AGENT ERROR: all {self._n} attempts failed. Last error: {last_error}"

	# 3) Majority vote on normalized strings (case-insensitive bucket).
	voted = self._vote(candidates)

	# 4) Verifier pass — pick + reformat using the LLM.
	try:
	verified = self._verify(question, candidates, voted)
	except Exception as e:
	print(f" verifier errored, falling back to vote: {e}")
	verified = voted

	return self._normalize(verified)

	# -------------------- helpers --------------------
	def _build_agent(self) -> CodeAgent:
	kwargs = dict(
	tools=self.tools,
	model=self.model,
	max_steps=config.MAX_STEPS,
	additional_authorized_imports=[
	"pandas", "numpy", "json", "re", "math", "statistics",
	"itertools", "collections", "datetime", "csv", "io",
	"pathlib", "string", "base64", "urllib", "unicodedata",
	],
	)
	if config.PLANNING_INTERVAL > 0:
	try:
	return CodeAgent(planning_interval=config.PLANNING_INTERVAL, **kwargs)
	except TypeError:
	pass
	return CodeAgent(**kwargs)

	def _single_run(
	self,
	question: str,
	task_id: Optional[str],
	attachment_line: str,
	) -> str:
	agent = self._build_agent()
	prompt = SYSTEM_PROMPT
	if task_id:
	prompt += f"\nThe current task_id is: {task_id}\n"
	if attachment_line:
	prompt += attachment_line
	prompt += f"\nQuestion:\n{question}\n"
	return agent.run(prompt)

	@staticmethod
	def _vote(candidates: list[str]) -> str:
	"""Majority-vote with case-insensitive bucketing; ties → first seen."""
	buckets: dict[str, list[str]] = {}
	for c in candidates:
	key = c.lower().strip()
	buckets.setdefault(key, []).append(c)
	ranked = sorted(
	buckets.items(),
	key=lambda kv: (-len(kv[1]), candidates.index(kv[1][0])),
	)
	return ranked[0][1][0]

	def _verify(
	self,
	question: str,
	candidates: list[str],
	voted: str,
	) -> str:
	"""Ask the LLM to choose + reformat the best candidate."""
	cand_block = "\n".join(f"- {c}" for c in candidates)
	user = (
	f"Question:\n{question}\n\n"
	f"Candidate answers from independent attempts:\n{cand_block}\n\n"
	f"Most common candidate: {voted}\n\n"
	f"Return the best final answer in the canonical GAIA format."
	)
	resp = self._verifier.chat.completions.create(
	model=self._model_id,
	messages=[
	{"role": "system", "content": VERIFIER_SYSTEM},
	{"role": "user", "content": user},
	],
	max_tokens=256,
	temperature=0.0,
	)
	text = (resp.choices[0].message.content or "").strip()
	m = re.search(r"final answer\s[:\-]\s(.+)", text, flags=re.IGNORECASE)
	return m.group(1).strip() if m else text

	@staticmethod
	def _normalize(text: str) -> str:
	"""Strip every common LLM cruft pattern so exact-match has a chance."""
	if not text:
	return text
	t = str(text).strip()

	# 1) If the model emitted "FINAL ANSWER: X" anywhere, keep only X.
	m = re.search(r"final answer\s[:\-]\s(.+)", t, flags=re.IGNORECASE \| re.DOTALL)
	if m:
	t = m.group(1).strip()
	t = t.split("\n")[0].strip()

	# 2) Strip wrapping markdown bold/italic/code.
	t = re.sub(r"^[_`]+\|[_`]+$", "", t).strip()

	# 3) Strip wrapping quotes (incl. smart quotes).
	if len(t) >= 2 and t[0] == t[-1] and t[0] in "\"'`“”‘’":
	t = t[1:-1].strip()

	# 4) Drop a single trailing period if the answer is short (not a sentence).
	if t.count(" ") < 6:
	t = t.rstrip(".")

	# 5) Collapse internal whitespace.
	t = re.sub(r"[ \t]+", " ", t).strip()

	return t