Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / utils.py

abhi1294

Fix prompts and utils

9b6ba86 9 days ago

raw

history blame contribute delete

10.2 kB

	# from __future__ import annotations

	# import re


	# FLUFF_LINES = {
	# "i hope this helps",
	# "hope this helps",
	# "let me know if you need anything else",
	# "thanks",
	# }


	# def extract_final_answer(text: str) -> str:
	# if text is None:
	# return ""

	# text = str(text).strip()
	# if not text:
	# return ""

	# text = re.sub(r"^```[a-zA-Z0-9_-]\s", "", text)
	# text = re.sub(r"\s*```$", "", text)

	# # Strong preference: explicit final-answer style markers
	# explicit_patterns = [
	# r"(?is)\bfinal answer\s:\s(.+)$",
	# r"(?is)\banswer\s:\s(.+)$",
	# r"(?is)\bthe answer is\s:\s(.+)$",
	# r"(?is)\bthe answer is\s+(.+)$",
	# ]
	# for pattern in explicit_patterns:
	# match = re.search(pattern, text)
	# if match:
	# candidate = match.group(1).strip()
	# candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()]
	# if candidate_lines:
	# return candidate_lines[0]

	# lines = [line.strip() for line in text.splitlines() if line.strip()]
	# if not lines:
	# return ""

	# # Prefer short non-fluff lines near the end
	# for line in reversed(lines):
	# normalized = normalize_basic_answer(line).lower()
	# if normalized and normalized not in FLUFF_LINES and len(normalized) <= 200:
	# return line

	# return lines[-1]


	# def normalize_basic_answer(text: str) -> str:
	# if text is None:
	# return ""

	# text = str(text).strip()
	# if not text:
	# return ""

	# text = re.sub(r"\s+", " ", text).strip()
	# text = re.sub(r"(?i)^(final answer\|answer)\s:\s", "", text).strip()

	# if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
	# text = text[1:-1].strip()

	# if text.endswith(".") and not re.fullmatch(r"\d+\.\d+", text):
	# text = text[:-1].strip()

	# return text


	# def normalize_final_answer(question: str, text: str) -> str:
	# text = normalize_basic_answer(text)
	# if not text:
	# return ""

	# q = question.lower()

	# # first name only
	# if "give only the first name" in q or "first name only" in q:
	# text = re.split(r"\s+", text.strip())[0]

	# # last name only
	# if "last names only" in q or "use their last names only" in q:
	# parts = [part.strip() for part in text.split(",")]
	# cleaned_parts = []
	# for part in parts:
	# tokens = part.split()
	# cleaned_parts.append(tokens[-1] if tokens else part)
	# text = ", ".join(cleaned_parts)

	# # city only
	# if "just give me the city name" in q or "city name without abbreviations" in q:
	# text = re.split(r"[,;()\-]", text)[0].strip()

	# # comma-delimited / comma separated list
	# if "comma separated list" in q or "comma-delimited list" in q or "comma delimited list" in q:
	# parts = [p.strip() for p in re.split(r",\|\n", text) if p.strip()]
	# text = ",".join(parts)

	# # ascending order / alphabetical
	# if "ascending order" in q:
	# try:
	# nums = [int(x.strip()) for x in text.split(",") if x.strip()]
	# text = ",".join(str(n) for n in sorted(nums))
	# except Exception:
	# pass

	# if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q:
	# parts = [p.strip() for p in text.split(",") if p.strip()]
	# if parts:
	# text = ",".join(sorted(parts, key=lambda x: x.lower()))

	# # two decimal places
	# if "two decimal places" in q:
	# number_match = re.search(r"-?\d+(?:\.\d+)?", text.replace(",", ""))
	# if number_match:
	# try:
	# value = float(number_match.group(0))
	# text = f"{value:.2f}"
	# except Exception:
	# pass

	# # IOC code / abbreviations / codes often expected uppercase single token
	# if "ioc country code" in q:
	# text = text.strip().upper()

	# # algebraic notation answer should be just one move token-like string
	# if "algebraic notation" in q:
	# text = text.strip().split()[0]

	# return text


	# def is_placeholder_answer(text: str) -> bool:
	# normalized = normalize_basic_answer(text).lower()
	# return normalized in {"", "placeholder", "n/a", "unknown"}


	from __future__ import annotations

	import re


	_FLUFF_LINES = {
	"i hope this helps",
	"hope this helps",
	"let me know if you need anything else",
	"thanks",
	"thank you",
	}


	def extract_final_answer(text: str) -> str:
	"""
	Extract the most likely final answer from raw model output.

	Strategy:
	- prefer explicit markers like 'Final answer:'
	- strip code fences
	- if multiline, prefer a short meaningful line near the end
	"""
	if text is None:
	return ""

	text = str(text).strip()
	if not text:
	return ""

	text = re.sub(r"^```[a-zA-Z0-9_-]\s", "", text)
	text = re.sub(r"\s*```$", "", text)

	explicit_patterns = [
	r"(?is)\bfinal answer\s:\s(.+)$",
	r"(?is)\banswer\s:\s(.+)$",
	r"(?is)\bthe answer is\s:\s(.+)$",
	r"(?is)\bthe answer is\s+(.+)$",
	]

	for pattern in explicit_patterns:
	match = re.search(pattern, text)
	if match:
	candidate = match.group(1).strip()
	candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()]
	if candidate_lines:
	return candidate_lines[0]

	lines = [line.strip() for line in text.splitlines() if line.strip()]
	if not lines:
	return ""

	for line in reversed(lines):
	normalized = normalize_basic_answer(line).lower()
	if normalized and normalized not in _FLUFF_LINES and len(normalized) <= 200:
	return line

	return lines[-1]


	def normalize_basic_answer(text: str) -> str:
	"""
	Basic cleanup independent of question format.
	"""
	if text is None:
	return ""

	text = str(text).strip()
	if not text:
	return ""

	text = re.sub(r"\s+", " ", text).strip()
	text = re.sub(r"(?i)^(final answer\|answer)\s:\s", "", text).strip()

	if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
	text = text[1:-1].strip()

	if text.endswith(".") and not re.fullmatch(r"-?\d+\.\d+", text):
	text = text[:-1].strip()

	return text


	def normalize_final_answer(*args: str) -> str:
	"""
	Backward-compatible normalizer.

	Supports:
	- normalize_final_answer(text)
	- normalize_final_answer(question, text)
	"""
	if len(args) == 1:
	question = ""
	text = args[0]
	elif len(args) == 2:
	question, text = args
	else:
	return ""

	text = normalize_basic_answer(text)
	if not text:
	return ""

	q = (question or "").lower()

	# Remove outer labels once more, conservatively
	text = re.sub(r"(?i)^(final answer\|answer)\s:\s", "", text).strip()

	# first name only
	if "give only the first name" in q or "first name only" in q:
	tokens = text.split()
	if tokens:
	text = tokens[0]

	# last name only
	if "last names only" in q or "use their last names only" in q:
	parts = [part.strip() for part in text.split(",") if part.strip()]
	if parts:
	cleaned_parts: list[str] = []
	for part in parts:
	tokens = part.split()
	cleaned_parts.append(tokens[-1] if tokens else part)
	text = ", ".join(cleaned_parts)

	# surname only
	if "what is the surname" in q or "surname of" in q:
	tokens = text.split()
	if tokens:
	text = tokens[-1]

	# city only
	if "city name without abbreviations" in q or "just give me the city name" in q:
	text = re.split(r"[,;()\-]", text)[0].strip()

	# IOC code
	if "ioc country code" in q:
	text = text.strip().upper()

	# algebraic notation
	if "algebraic notation" in q:
	text = text.strip().split()[0]

	# comma-separated list formatting
	if (
	"comma separated list" in q
	or "comma-separated list" in q
	or "comma delimited list" in q
	or "comma-delimited list" in q
	or "comma separated" in q
	):
	parts = [p.strip() for p in re.split(r",\|\n", text) if p.strip()]
	text = ",".join(parts)

	# ascending order
	if "ascending order" in q:
	try:
	nums = [int(x.strip()) for x in text.split(",") if x.strip()]
	text = ",".join(str(n) for n in sorted(nums))
	except Exception:
	pass

	# alphabetical order
	if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q:
	parts = [p.strip() for p in text.split(",") if p.strip()]
	if parts:
	text = ",".join(sorted(parts, key=lambda x: x.lower()))

	# two decimal places
	if "two decimal places" in q:
	compact = text.replace(",", "")
	match = re.search(r"-?\d+(?:\.\d+)?", compact)
	if match:
	try:
	value = float(match.group(0))
	text = f"{value:.2f}"
	except Exception:
	pass
	if "nasa award number" in q:
	text = text.replace("NASA award number", "").strip()

	if "city name without abbreviations" in q:
	text = text.replace("St. Petersburg", "Saint Petersburg").strip()

	if "use their last names only" in q:
	parts = [p.strip() for p in text.split(",") if p.strip()]
	last_names = []
	for part in parts:
	tokens = part.split()
	if tokens:
	last_names.append(tokens[-1])
	if last_names:
	text = ",".join(last_names)

	return text.strip()


	def is_placeholder_answer(text: str) -> bool:
	"""
	Detect placeholder/fallback outputs.
	"""
	if text is None:
	return True

	normalized = normalize_basic_answer(text).lower()
	return normalized in {
	"",
	"placeholder",
	"n/a",
	"unknown",
	}