import re from typing import Optional IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"} SPREADSHEET_SUFFIXES = {".xlsx", ".xls", ".csv"} PYTHON_SUFFIXES = {".py"} AUDIO_SUFFIXES = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg", ".opus", ".webm"} TEXT_SUFFIXES = {".txt", ".md", ".json", ".csv", ".tsv", ".html", ".htm"} def build_user_content(question: str, task_id: Optional[str]) -> str: if not task_id: return question return ( f"{question}\n\n" f"Task ID: {task_id}\n" "If this question has an attachment, call download_task_file with this task_id first. " "Then use the most specific follow-up tool for the downloaded file type." ) def classify_attachment(question: str, suffix: str) -> Optional[str]: suffix = (suffix or "").lower() q = (question or "").lower() if suffix in IMAGE_SUFFIXES: return "image" if suffix in AUDIO_SUFFIXES: return "audio" if suffix in PYTHON_SUFFIXES: return "python" if suffix in SPREADSHEET_SUFFIXES: return "spreadsheet" if suffix in TEXT_SUFFIXES: return "text" if any(x in q for x in ["image", "picture", "screenshot", "chess position", "visual", "diagram", "shown in"]): return "image" if any(x in q for x in ["audio", "recording", "mp3", "wav", "says", "say in response", "lecture"]): return "audio" if any(x in q for x in ["python code", "attached python", "numeric output", "run the attached python"]): return "python" if any(x in q for x in ["excel", "spreadsheet", "csv", "sales", "table contains"]): return "spreadsheet" if any(x in q for x in ["attached text", "text file", "read the attached", "document"]): return "text" return None def is_youtube_question(question: str) -> bool: return bool(re.search(r"https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)", question or "")) def is_youtube_visual_question(question: str) -> bool: q = (question or "").lower() if not is_youtube_question(question): return False visual_markers = [ "on camera", "visible", "shown", "see in the video", "highest number", "how many", "appears", "frame", ] speech_markers = [ "what does", "say", "says", "spoken", "response", "transcript", ] return any(marker in q for marker in visual_markers) and not any(marker in q for marker in speech_markers) def cleanup_exact_answer(raw_answer: str) -> str: answer = str(raw_answer or "").strip() answer = re.sub(r"^```(?:\w+)?\s*", "", answer) answer = re.sub(r"\s*```$", "", answer) answer = answer.strip().strip("`").strip() answer = re.sub(r"^(?:final answer|answer)\s*:\s*", "", answer, flags=re.IGNORECASE) answer = re.sub(r"^the answer is\s*:?\s*", "", answer, flags=re.IGNORECASE) if len(answer) > 1 and answer.endswith(".") and not re.search(r"\d\.\d$", answer): answer = answer[:-1] return answer.strip()