SurweeshSP
/

mathtok

Model card Files Files and versions

mathtok / tests /test_lexer.py

SurweeshSP's picture

Initial clean MathTok release

edede4c 5 days ago

history blame contribute delete

2.89 kB

	"""
	Tests for the Hybrid Lexer (Layer 2).
	"""

	import pytest
	from mathtok.lexer import HybridLexer, LexSpan, SpanType


	@pytest.fixture
	def lex():
	return HybridLexer(ascii_math_detection=True, min_math_len=3)


	class TestLatexDetection:
	def test_inline_dollar(self, lex):
	spans = lex.lex("Let $x^2 + 1$ be given.")
	types = [s.span_type for s in spans if s.content.strip()]
	assert SpanType.MATH in types
	assert SpanType.TEXT in types

	def test_display_dollar(self, lex):
	spans = lex.lex("$$x^2 + y^2 = 1$$")
	math_spans = [s for s in spans if s.span_type is SpanType.MATH]
	assert len(math_spans) >= 1
	assert "x^2" in math_spans[0].content or "x" in math_spans[0].content

	def test_inline_paren(self, lex):
	spans = lex.lex("We have \\(a + b\\) here.")
	math_spans = [s for s in spans if s.span_type is SpanType.MATH]
	assert len(math_spans) == 1

	def test_display_bracket(self, lex):
	spans = lex.lex("Result: \\[x = \\frac{-b}{2a}\\]")
	math_spans = [s for s in spans if s.span_type is SpanType.MATH]
	assert len(math_spans) == 1

	def test_multiple_math_spans(self, lex):
	spans = lex.lex("If $a > 0$ and $b < 0$, then $a + b$ may be zero.")
	math_spans = [s for s in spans if s.span_type is SpanType.MATH]
	assert len(math_spans) == 3

	def test_pure_text(self, lex):
	spans = lex.lex("This is plain English text with no math at all.")
	math_spans = [s for s in spans if s.span_type is SpanType.MATH]
	assert len(math_spans) == 0


	class TestAsciiDetection:
	def test_function_call(self, lex):
	spans = lex.lex("Compute sin(x) for x = pi.")
	math_spans = [s for s in spans if s.span_type is SpanType.MATH]
	assert any("sin" in s.content for s in math_spans)

	def test_exponentiation(self, lex):
	spans = lex.lex("The value of x^2 is always positive.")
	math_spans = [s for s in spans if s.span_type is SpanType.MATH]
	assert len(math_spans) >= 1

	def test_equation(self, lex):
	spans = lex.lex("Solve x^2 + 2*x + 1 = 0.")
	math_spans = [s for s in spans if s.span_type is SpanType.MATH]
	assert len(math_spans) >= 1


	class TestEdgeCases:
	def test_empty_string(self, lex):
	spans = lex.lex("")
	assert spans == []

	def test_only_whitespace(self, lex):
	spans = lex.lex(" ")
	assert all(s.span_type is SpanType.TEXT for s in spans)

	def test_is_math_only_true(self, lex):
	assert lex.is_math_only("$x^2 + 1$")

	def test_adjacent_spans_merged(self, lex):
	spans = lex.lex("hello world, no math here at all.")
	# All-text should be merged into a minimal number of spans
	text_spans = [s for s in spans if s.span_type is SpanType.TEXT]
	assert len(text_spans) <= 2