Spaces:

Varshithdharmajv
/

mvm2-math-verification

Running

Antigravity Agent

fix(ocr): apply universal CJK filter and early input sanitization

be2c8ad 14 days ago

1.38 kB

	import re

	# CJK character ranges (Chinese, Japanese, Korean) including punctuation
	CJK_PATTERN = re.compile(r'[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af\u3000-\u303f\uff00-\uffef\u3001\u3002\uff0c\uff0e\uff1a\uff1b\uff1f\uff01]')

	def clean_latex(text: str) -> str:
	"""Standardized cleaning for both OCR and LLM logic."""
	if not text: return ""
	# Remove CJK
	text = CJK_PATTERN.sub('', text)
	# Remove LaTeX wrappers
	text = text.replace('\\', '').replace('{', '').replace('}', '').replace('$', '')
	# Remove common conversational prefixes in math problems
	text = re.sub(r'(?i)\b(prove\|solve\|calculate\|find\|simplify\|evaluate\|where)\b', '', text)
	# Expand implicit multiplication: 2x -> 2*x
	text = re.sub(r'(\d)([a-zA-Z\(])', r'\1*\2', text)
	text = re.sub(r'([a-zA-Z\)])(\d)', r'\1*\2', text)
	# Normalize whitespace and strip
	text = re.sub(r'\s+', ' ', text).strip()
	return text

	def normalize_math_string(s: str) -> str:
	"""Normalize mathematical strings for comparison."""
	if not s: return ""
	s = s.replace(" ", "").lower()
	# Try to normalize numeric parts
	try:
	if ',' in s:
	parts = [normalize_math_string(p) for p in s.split(',')]
	return ','.join(sorted(parts))
	f = float(s)
	return str(int(f)) if f == int(f) else str(round(f, 6))
	except:
	return s