import re
from typing import Optional


IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
SPREADSHEET_SUFFIXES = {".xlsx", ".xls", ".csv"}
PYTHON_SUFFIXES = {".py"}
AUDIO_SUFFIXES = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg", ".opus", ".webm"}
TEXT_SUFFIXES = {".txt", ".md", ".json", ".csv", ".tsv", ".html", ".htm"}


def build_user_content(question: str, task_id: Optional[str]) -> str:
    if not task_id:
        return question

    return (
        f"{question}\n\n"
        f"Task ID: {task_id}\n"
        "If this question has an attachment, call download_task_file with this task_id first. "
        "Then use the most specific follow-up tool for the downloaded file type."
    )


def classify_attachment(question: str, suffix: str) -> Optional[str]:
    suffix = (suffix or "").lower()
    q = (question or "").lower()

    if suffix in IMAGE_SUFFIXES:
        return "image"
    if suffix in AUDIO_SUFFIXES:
        return "audio"
    if suffix in PYTHON_SUFFIXES:
        return "python"
    if suffix in SPREADSHEET_SUFFIXES:
        return "spreadsheet"
    if suffix in TEXT_SUFFIXES:
        return "text"

    if any(x in q for x in ["image", "picture", "screenshot", "chess position", "visual", "diagram", "shown in"]):
        return "image"
    if any(x in q for x in ["audio", "recording", "mp3", "wav", "says", "say in response", "lecture"]):
        return "audio"
    if any(x in q for x in ["python code", "attached python", "numeric output", "run the attached python"]):
        return "python"
    if any(x in q for x in ["excel", "spreadsheet", "csv", "sales", "table contains"]):
        return "spreadsheet"
    if any(x in q for x in ["attached text", "text file", "read the attached", "document"]):
        return "text"

    return None


def is_youtube_question(question: str) -> bool:
    return bool(re.search(r"https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)", question or ""))


def is_youtube_visual_question(question: str) -> bool:
    q = (question or "").lower()
    if not is_youtube_question(question):
        return False

    visual_markers = [
        "on camera",
        "visible",
        "shown",
        "see in the video",
        "highest number",
        "how many",
        "appears",
        "frame",
    ]
    speech_markers = [
        "what does",
        "say",
        "says",
        "spoken",
        "response",
        "transcript",
    ]

    return any(marker in q for marker in visual_markers) and not any(marker in q for marker in speech_markers)


def cleanup_exact_answer(raw_answer: str) -> str:
    answer = str(raw_answer or "").strip()
    answer = re.sub(r"^```(?:\w+)?\s*", "", answer)
    answer = re.sub(r"\s*```$", "", answer)
    answer = answer.strip().strip("`").strip()
    answer = re.sub(r"^(?:final answer|answer)\s*:\s*", "", answer, flags=re.IGNORECASE)
    answer = re.sub(r"^the answer is\s*:?\s*", "", answer, flags=re.IGNORECASE)

    if len(answer) > 1 and answer.endswith(".") and not re.search(r"\d\.\d$", answer):
        answer = answer[:-1]

    return answer.strip()