Final_Assignment

Sleeping

App Files Files Community

Final_Assignment / agent_helpers.py

kenqia

feat: strengthen evidence-based GAIA agent

f55fed4 20 days ago

raw

history blame contribute delete

3.1 kB

	import re
	from typing import Optional


	IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
	SPREADSHEET_SUFFIXES = {".xlsx", ".xls", ".csv"}
	PYTHON_SUFFIXES = {".py"}
	AUDIO_SUFFIXES = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg", ".opus", ".webm"}
	TEXT_SUFFIXES = {".txt", ".md", ".json", ".csv", ".tsv", ".html", ".htm"}


	def build_user_content(question: str, task_id: Optional[str]) -> str:
	if not task_id:
	return question

	return (
	f"{question}\n\n"
	f"Task ID: {task_id}\n"
	"If this question has an attachment, call download_task_file with this task_id first. "
	"Then use the most specific follow-up tool for the downloaded file type."
	)


	def classify_attachment(question: str, suffix: str) -> Optional[str]:
	suffix = (suffix or "").lower()
	q = (question or "").lower()

	if suffix in IMAGE_SUFFIXES:
	return "image"
	if suffix in AUDIO_SUFFIXES:
	return "audio"
	if suffix in PYTHON_SUFFIXES:
	return "python"
	if suffix in SPREADSHEET_SUFFIXES:
	return "spreadsheet"
	if suffix in TEXT_SUFFIXES:
	return "text"

	if any(x in q for x in ["image", "picture", "screenshot", "chess position", "visual", "diagram", "shown in"]):
	return "image"
	if any(x in q for x in ["audio", "recording", "mp3", "wav", "says", "say in response", "lecture"]):
	return "audio"
	if any(x in q for x in ["python code", "attached python", "numeric output", "run the attached python"]):
	return "python"
	if any(x in q for x in ["excel", "spreadsheet", "csv", "sales", "table contains"]):
	return "spreadsheet"
	if any(x in q for x in ["attached text", "text file", "read the attached", "document"]):
	return "text"

	return None


	def is_youtube_question(question: str) -> bool:
	return bool(re.search(r"https?://(?:www\.)?(?:youtube\.com/watch\?v=\|youtu\.be/)", question or ""))


	def is_youtube_visual_question(question: str) -> bool:
	q = (question or "").lower()
	if not is_youtube_question(question):
	return False

	visual_markers = [
	"on camera",
	"visible",
	"shown",
	"see in the video",
	"highest number",
	"how many",
	"appears",
	"frame",
	]
	speech_markers = [
	"what does",
	"say",
	"says",
	"spoken",
	"response",
	"transcript",
	]

	return any(marker in q for marker in visual_markers) and not any(marker in q for marker in speech_markers)


	def cleanup_exact_answer(raw_answer: str) -> str:
	answer = str(raw_answer or "").strip()
	answer = re.sub(r"^```(?:\w+)?\s*", "", answer)
	answer = re.sub(r"\s*```$", "", answer)
	answer = answer.strip().strip("`").strip()
	answer = re.sub(r"^(?:final answer\|answer)\s:\s", "", answer, flags=re.IGNORECASE)
	answer = re.sub(r"^the answer is\s:?\s", "", answer, flags=re.IGNORECASE)

	if len(answer) > 1 and answer.endswith(".") and not re.search(r"\d\.\d$", answer):
	answer = answer[:-1]

	return answer.strip()