abhi1294's picture
Fix prompts and utils
9b6ba86
# from __future__ import annotations
# import re
# FLUFF_LINES = {
# "i hope this helps",
# "hope this helps",
# "let me know if you need anything else",
# "thanks",
# }
# def extract_final_answer(text: str) -> str:
# if text is None:
# return ""
# text = str(text).strip()
# if not text:
# return ""
# text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
# text = re.sub(r"\s*```$", "", text)
# # Strong preference: explicit final-answer style markers
# explicit_patterns = [
# r"(?is)\bfinal answer\s*:\s*(.+)$",
# r"(?is)\banswer\s*:\s*(.+)$",
# r"(?is)\bthe answer is\s*:\s*(.+)$",
# r"(?is)\bthe answer is\s+(.+)$",
# ]
# for pattern in explicit_patterns:
# match = re.search(pattern, text)
# if match:
# candidate = match.group(1).strip()
# candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()]
# if candidate_lines:
# return candidate_lines[0]
# lines = [line.strip() for line in text.splitlines() if line.strip()]
# if not lines:
# return ""
# # Prefer short non-fluff lines near the end
# for line in reversed(lines):
# normalized = normalize_basic_answer(line).lower()
# if normalized and normalized not in FLUFF_LINES and len(normalized) <= 200:
# return line
# return lines[-1]
# def normalize_basic_answer(text: str) -> str:
# if text is None:
# return ""
# text = str(text).strip()
# if not text:
# return ""
# text = re.sub(r"\s+", " ", text).strip()
# text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
# if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
# text = text[1:-1].strip()
# if text.endswith(".") and not re.fullmatch(r"\d+\.\d+", text):
# text = text[:-1].strip()
# return text
# def normalize_final_answer(question: str, text: str) -> str:
# text = normalize_basic_answer(text)
# if not text:
# return ""
# q = question.lower()
# # first name only
# if "give only the first name" in q or "first name only" in q:
# text = re.split(r"\s+", text.strip())[0]
# # last name only
# if "last names only" in q or "use their last names only" in q:
# parts = [part.strip() for part in text.split(",")]
# cleaned_parts = []
# for part in parts:
# tokens = part.split()
# cleaned_parts.append(tokens[-1] if tokens else part)
# text = ", ".join(cleaned_parts)
# # city only
# if "just give me the city name" in q or "city name without abbreviations" in q:
# text = re.split(r"[,;()\-]", text)[0].strip()
# # comma-delimited / comma separated list
# if "comma separated list" in q or "comma-delimited list" in q or "comma delimited list" in q:
# parts = [p.strip() for p in re.split(r",|\n", text) if p.strip()]
# text = ",".join(parts)
# # ascending order / alphabetical
# if "ascending order" in q:
# try:
# nums = [int(x.strip()) for x in text.split(",") if x.strip()]
# text = ",".join(str(n) for n in sorted(nums))
# except Exception:
# pass
# if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q:
# parts = [p.strip() for p in text.split(",") if p.strip()]
# if parts:
# text = ",".join(sorted(parts, key=lambda x: x.lower()))
# # two decimal places
# if "two decimal places" in q:
# number_match = re.search(r"-?\d+(?:\.\d+)?", text.replace(",", ""))
# if number_match:
# try:
# value = float(number_match.group(0))
# text = f"{value:.2f}"
# except Exception:
# pass
# # IOC code / abbreviations / codes often expected uppercase single token
# if "ioc country code" in q:
# text = text.strip().upper()
# # algebraic notation answer should be just one move token-like string
# if "algebraic notation" in q:
# text = text.strip().split()[0]
# return text
# def is_placeholder_answer(text: str) -> bool:
# normalized = normalize_basic_answer(text).lower()
# return normalized in {"", "placeholder", "n/a", "unknown"}
from __future__ import annotations
import re
_FLUFF_LINES = {
"i hope this helps",
"hope this helps",
"let me know if you need anything else",
"thanks",
"thank you",
}
def extract_final_answer(text: str) -> str:
"""
Extract the most likely final answer from raw model output.
Strategy:
- prefer explicit markers like 'Final answer:'
- strip code fences
- if multiline, prefer a short meaningful line near the end
"""
if text is None:
return ""
text = str(text).strip()
if not text:
return ""
text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
text = re.sub(r"\s*```$", "", text)
explicit_patterns = [
r"(?is)\bfinal answer\s*:\s*(.+)$",
r"(?is)\banswer\s*:\s*(.+)$",
r"(?is)\bthe answer is\s*:\s*(.+)$",
r"(?is)\bthe answer is\s+(.+)$",
]
for pattern in explicit_patterns:
match = re.search(pattern, text)
if match:
candidate = match.group(1).strip()
candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()]
if candidate_lines:
return candidate_lines[0]
lines = [line.strip() for line in text.splitlines() if line.strip()]
if not lines:
return ""
for line in reversed(lines):
normalized = normalize_basic_answer(line).lower()
if normalized and normalized not in _FLUFF_LINES and len(normalized) <= 200:
return line
return lines[-1]
def normalize_basic_answer(text: str) -> str:
"""
Basic cleanup independent of question format.
"""
if text is None:
return ""
text = str(text).strip()
if not text:
return ""
text = re.sub(r"\s+", " ", text).strip()
text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
text = text[1:-1].strip()
if text.endswith(".") and not re.fullmatch(r"-?\d+\.\d+", text):
text = text[:-1].strip()
return text
def normalize_final_answer(*args: str) -> str:
"""
Backward-compatible normalizer.
Supports:
- normalize_final_answer(text)
- normalize_final_answer(question, text)
"""
if len(args) == 1:
question = ""
text = args[0]
elif len(args) == 2:
question, text = args
else:
return ""
text = normalize_basic_answer(text)
if not text:
return ""
q = (question or "").lower()
# Remove outer labels once more, conservatively
text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
# first name only
if "give only the first name" in q or "first name only" in q:
tokens = text.split()
if tokens:
text = tokens[0]
# last name only
if "last names only" in q or "use their last names only" in q:
parts = [part.strip() for part in text.split(",") if part.strip()]
if parts:
cleaned_parts: list[str] = []
for part in parts:
tokens = part.split()
cleaned_parts.append(tokens[-1] if tokens else part)
text = ", ".join(cleaned_parts)
# surname only
if "what is the surname" in q or "surname of" in q:
tokens = text.split()
if tokens:
text = tokens[-1]
# city only
if "city name without abbreviations" in q or "just give me the city name" in q:
text = re.split(r"[,;()\-]", text)[0].strip()
# IOC code
if "ioc country code" in q:
text = text.strip().upper()
# algebraic notation
if "algebraic notation" in q:
text = text.strip().split()[0]
# comma-separated list formatting
if (
"comma separated list" in q
or "comma-separated list" in q
or "comma delimited list" in q
or "comma-delimited list" in q
or "comma separated" in q
):
parts = [p.strip() for p in re.split(r",|\n", text) if p.strip()]
text = ",".join(parts)
# ascending order
if "ascending order" in q:
try:
nums = [int(x.strip()) for x in text.split(",") if x.strip()]
text = ",".join(str(n) for n in sorted(nums))
except Exception:
pass
# alphabetical order
if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q:
parts = [p.strip() for p in text.split(",") if p.strip()]
if parts:
text = ",".join(sorted(parts, key=lambda x: x.lower()))
# two decimal places
if "two decimal places" in q:
compact = text.replace(",", "")
match = re.search(r"-?\d+(?:\.\d+)?", compact)
if match:
try:
value = float(match.group(0))
text = f"{value:.2f}"
except Exception:
pass
if "nasa award number" in q:
text = text.replace("NASA award number", "").strip()
if "city name without abbreviations" in q:
text = text.replace("St. Petersburg", "Saint Petersburg").strip()
if "use their last names only" in q:
parts = [p.strip() for p in text.split(",") if p.strip()]
last_names = []
for part in parts:
tokens = part.split()
if tokens:
last_names.append(tokens[-1])
if last_names:
text = ",".join(last_names)
return text.strip()
def is_placeholder_answer(text: str) -> bool:
"""
Detect placeholder/fallback outputs.
"""
if text is None:
return True
normalized = normalize_basic_answer(text).lower()
return normalized in {
"",
"placeholder",
"n/a",
"unknown",
}