Spaces:
Sleeping
Sleeping
| import re | |
| from typing import Optional | |
| IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"} | |
| SPREADSHEET_SUFFIXES = {".xlsx", ".xls", ".csv"} | |
| PYTHON_SUFFIXES = {".py"} | |
| AUDIO_SUFFIXES = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg", ".opus", ".webm"} | |
| TEXT_SUFFIXES = {".txt", ".md", ".json", ".csv", ".tsv", ".html", ".htm"} | |
| def build_user_content(question: str, task_id: Optional[str]) -> str: | |
| if not task_id: | |
| return question | |
| return ( | |
| f"{question}\n\n" | |
| f"Task ID: {task_id}\n" | |
| "If this question has an attachment, call download_task_file with this task_id first. " | |
| "Then use the most specific follow-up tool for the downloaded file type." | |
| ) | |
| def classify_attachment(question: str, suffix: str) -> Optional[str]: | |
| suffix = (suffix or "").lower() | |
| q = (question or "").lower() | |
| if suffix in IMAGE_SUFFIXES: | |
| return "image" | |
| if suffix in AUDIO_SUFFIXES: | |
| return "audio" | |
| if suffix in PYTHON_SUFFIXES: | |
| return "python" | |
| if suffix in SPREADSHEET_SUFFIXES: | |
| return "spreadsheet" | |
| if suffix in TEXT_SUFFIXES: | |
| return "text" | |
| if any(x in q for x in ["image", "picture", "screenshot", "chess position", "visual", "diagram", "shown in"]): | |
| return "image" | |
| if any(x in q for x in ["audio", "recording", "mp3", "wav", "says", "say in response", "lecture"]): | |
| return "audio" | |
| if any(x in q for x in ["python code", "attached python", "numeric output", "run the attached python"]): | |
| return "python" | |
| if any(x in q for x in ["excel", "spreadsheet", "csv", "sales", "table contains"]): | |
| return "spreadsheet" | |
| if any(x in q for x in ["attached text", "text file", "read the attached", "document"]): | |
| return "text" | |
| return None | |
| def is_youtube_question(question: str) -> bool: | |
| return bool(re.search(r"https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)", question or "")) | |
| def is_youtube_visual_question(question: str) -> bool: | |
| q = (question or "").lower() | |
| if not is_youtube_question(question): | |
| return False | |
| visual_markers = [ | |
| "on camera", | |
| "visible", | |
| "shown", | |
| "see in the video", | |
| "highest number", | |
| "how many", | |
| "appears", | |
| "frame", | |
| ] | |
| speech_markers = [ | |
| "what does", | |
| "say", | |
| "says", | |
| "spoken", | |
| "response", | |
| "transcript", | |
| ] | |
| return any(marker in q for marker in visual_markers) and not any(marker in q for marker in speech_markers) | |
| def cleanup_exact_answer(raw_answer: str) -> str: | |
| answer = str(raw_answer or "").strip() | |
| answer = re.sub(r"^```(?:\w+)?\s*", "", answer) | |
| answer = re.sub(r"\s*```$", "", answer) | |
| answer = answer.strip().strip("`").strip() | |
| answer = re.sub(r"^(?:final answer|answer)\s*:\s*", "", answer, flags=re.IGNORECASE) | |
| answer = re.sub(r"^the answer is\s*:?\s*", "", answer, flags=re.IGNORECASE) | |
| if len(answer) > 1 and answer.endswith(".") and not re.search(r"\d\.\d$", answer): | |
| answer = answer[:-1] | |
| return answer.strip() | |