Final_Assignment_Template

Sleeping

File size: 10,190 Bytes

# from __future__ import annotations

# import re


# FLUFF_LINES = {
#     "i hope this helps",
#     "hope this helps",
#     "let me know if you need anything else",
#     "thanks",
# }


# def extract_final_answer(text: str) -> str:
#     if text is None:
#         return ""

#     text = str(text).strip()
#     if not text:
#         return ""

#     text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
#     text = re.sub(r"\s*```$", "", text)

#     # Strong preference: explicit final-answer style markers
#     explicit_patterns = [
#         r"(?is)\bfinal answer\s*:\s*(.+)$",
#         r"(?is)\banswer\s*:\s*(.+)$",
#         r"(?is)\bthe answer is\s*:\s*(.+)$",
#         r"(?is)\bthe answer is\s+(.+)$",
#     ]
#     for pattern in explicit_patterns:
#         match = re.search(pattern, text)
#         if match:
#             candidate = match.group(1).strip()
#             candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()]
#             if candidate_lines:
#                 return candidate_lines[0]

#     lines = [line.strip() for line in text.splitlines() if line.strip()]
#     if not lines:
#         return ""

#     # Prefer short non-fluff lines near the end
#     for line in reversed(lines):
#         normalized = normalize_basic_answer(line).lower()
#         if normalized and normalized not in FLUFF_LINES and len(normalized) <= 200:
#             return line

#     return lines[-1]


# def normalize_basic_answer(text: str) -> str:
#     if text is None:
#         return ""

#     text = str(text).strip()
#     if not text:
#         return ""

#     text = re.sub(r"\s+", " ", text).strip()
#     text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()

#     if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
#         text = text[1:-1].strip()

#     if text.endswith(".") and not re.fullmatch(r"\d+\.\d+", text):
#         text = text[:-1].strip()

#     return text


# def normalize_final_answer(question: str, text: str) -> str:
#     text = normalize_basic_answer(text)
#     if not text:
#         return ""

#     q = question.lower()

#     # first name only
#     if "give only the first name" in q or "first name only" in q:
#         text = re.split(r"\s+", text.strip())[0]

#     # last name only
#     if "last names only" in q or "use their last names only" in q:
#         parts = [part.strip() for part in text.split(",")]
#         cleaned_parts = []
#         for part in parts:
#             tokens = part.split()
#             cleaned_parts.append(tokens[-1] if tokens else part)
#         text = ", ".join(cleaned_parts)

#     # city only
#     if "just give me the city name" in q or "city name without abbreviations" in q:
#         text = re.split(r"[,;()\-]", text)[0].strip()

#     # comma-delimited / comma separated list
#     if "comma separated list" in q or "comma-delimited list" in q or "comma delimited list" in q:
#         parts = [p.strip() for p in re.split(r",|\n", text) if p.strip()]
#         text = ",".join(parts)

#     # ascending order / alphabetical
#     if "ascending order" in q:
#         try:
#             nums = [int(x.strip()) for x in text.split(",") if x.strip()]
#             text = ",".join(str(n) for n in sorted(nums))
#         except Exception:
#             pass

#     if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q:
#         parts = [p.strip() for p in text.split(",") if p.strip()]
#         if parts:
#             text = ",".join(sorted(parts, key=lambda x: x.lower()))

#     # two decimal places
#     if "two decimal places" in q:
#         number_match = re.search(r"-?\d+(?:\.\d+)?", text.replace(",", ""))
#         if number_match:
#             try:
#                 value = float(number_match.group(0))
#                 text = f"{value:.2f}"
#             except Exception:
#                 pass

#     # IOC code / abbreviations / codes often expected uppercase single token
#     if "ioc country code" in q:
#         text = text.strip().upper()

#     # algebraic notation answer should be just one move token-like string
#     if "algebraic notation" in q:
#         text = text.strip().split()[0]

#     return text


# def is_placeholder_answer(text: str) -> bool:
#     normalized = normalize_basic_answer(text).lower()
#     return normalized in {"", "placeholder", "n/a", "unknown"}


from __future__ import annotations

import re


_FLUFF_LINES = {
    "i hope this helps",
    "hope this helps",
    "let me know if you need anything else",
    "thanks",
    "thank you",
}


def extract_final_answer(text: str) -> str:
    """
    Extract the most likely final answer from raw model output.

    Strategy:
    - prefer explicit markers like 'Final answer:'
    - strip code fences
    - if multiline, prefer a short meaningful line near the end
    """
    if text is None:
        return ""

    text = str(text).strip()
    if not text:
        return ""

    text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
    text = re.sub(r"\s*```$", "", text)

    explicit_patterns = [
        r"(?is)\bfinal answer\s*:\s*(.+)$",
        r"(?is)\banswer\s*:\s*(.+)$",
        r"(?is)\bthe answer is\s*:\s*(.+)$",
        r"(?is)\bthe answer is\s+(.+)$",
    ]

    for pattern in explicit_patterns:
        match = re.search(pattern, text)
        if match:
            candidate = match.group(1).strip()
            candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()]
            if candidate_lines:
                return candidate_lines[0]

    lines = [line.strip() for line in text.splitlines() if line.strip()]
    if not lines:
        return ""

    for line in reversed(lines):
        normalized = normalize_basic_answer(line).lower()
        if normalized and normalized not in _FLUFF_LINES and len(normalized) <= 200:
            return line

    return lines[-1]


def normalize_basic_answer(text: str) -> str:
    """
    Basic cleanup independent of question format.
    """
    if text is None:
        return ""

    text = str(text).strip()
    if not text:
        return ""

    text = re.sub(r"\s+", " ", text).strip()
    text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()

    if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
        text = text[1:-1].strip()

    if text.endswith(".") and not re.fullmatch(r"-?\d+\.\d+", text):
        text = text[:-1].strip()

    return text


def normalize_final_answer(*args: str) -> str:
    """
    Backward-compatible normalizer.

    Supports:
    - normalize_final_answer(text)
    - normalize_final_answer(question, text)
    """
    if len(args) == 1:
        question = ""
        text = args[0]
    elif len(args) == 2:
        question, text = args
    else:
        return ""

    text = normalize_basic_answer(text)
    if not text:
        return ""

    q = (question or "").lower()

    # Remove outer labels once more, conservatively
    text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()

    # first name only
    if "give only the first name" in q or "first name only" in q:
        tokens = text.split()
        if tokens:
            text = tokens[0]

    # last name only
    if "last names only" in q or "use their last names only" in q:
        parts = [part.strip() for part in text.split(",") if part.strip()]
        if parts:
            cleaned_parts: list[str] = []
            for part in parts:
                tokens = part.split()
                cleaned_parts.append(tokens[-1] if tokens else part)
            text = ", ".join(cleaned_parts)

    # surname only
    if "what is the surname" in q or "surname of" in q:
        tokens = text.split()
        if tokens:
            text = tokens[-1]

    # city only
    if "city name without abbreviations" in q or "just give me the city name" in q:
        text = re.split(r"[,;()\-]", text)[0].strip()

    # IOC code
    if "ioc country code" in q:
        text = text.strip().upper()

    # algebraic notation
    if "algebraic notation" in q:
        text = text.strip().split()[0]

    # comma-separated list formatting
    if (
        "comma separated list" in q
        or "comma-separated list" in q
        or "comma delimited list" in q
        or "comma-delimited list" in q
        or "comma separated" in q
    ):
        parts = [p.strip() for p in re.split(r",|\n", text) if p.strip()]
        text = ",".join(parts)

    # ascending order
    if "ascending order" in q:
        try:
            nums = [int(x.strip()) for x in text.split(",") if x.strip()]
            text = ",".join(str(n) for n in sorted(nums))
        except Exception:
            pass

    # alphabetical order
    if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q:
        parts = [p.strip() for p in text.split(",") if p.strip()]
        if parts:
            text = ",".join(sorted(parts, key=lambda x: x.lower()))

    # two decimal places
    if "two decimal places" in q:
        compact = text.replace(",", "")
        match = re.search(r"-?\d+(?:\.\d+)?", compact)
        if match:
            try:
                value = float(match.group(0))
                text = f"{value:.2f}"
            except Exception:
                pass
    if "nasa award number" in q:
        text = text.replace("NASA award number", "").strip()

    if "city name without abbreviations" in q:
        text = text.replace("St. Petersburg", "Saint Petersburg").strip()

    if "use their last names only" in q:
        parts = [p.strip() for p in text.split(",") if p.strip()]
        last_names = []
        for part in parts:
            tokens = part.split()
            if tokens:
                last_names.append(tokens[-1])
        if last_names:
            text = ",".join(last_names)

    return text.strip()


def is_placeholder_answer(text: str) -> bool:
    """
    Detect placeholder/fallback outputs.
    """
    if text is None:
        return True

    normalized = normalize_basic_answer(text).lower()
    return normalized in {
        "",
        "placeholder",
        "n/a",
        "unknown",
    }