Text Generation
Transformers
English
custom
tokenizer
symbolic-ai
mathematics
llm
reasoning
ast
compiler
nlp
deep-learning
machine-learning
mathematical-reasoning
symbolic-reasoning
tokenization
parser
artificial-intelligence
Eval Results (legacy)
Instructions to use SurweeshSP/mathtok with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use SurweeshSP/mathtok with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="SurweeshSP/mathtok")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("SurweeshSP/mathtok", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use SurweeshSP/mathtok with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "SurweeshSP/mathtok" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SurweeshSP/mathtok", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/SurweeshSP/mathtok
- SGLang
How to use SurweeshSP/mathtok with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "SurweeshSP/mathtok" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SurweeshSP/mathtok", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "SurweeshSP/mathtok" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SurweeshSP/mathtok", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use SurweeshSP/mathtok with Docker Model Runner:
docker model run hf.co/SurweeshSP/mathtok
File size: 2,891 Bytes
edede4c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | """
Tests for the Hybrid Lexer (Layer 2).
"""
import pytest
from mathtok.lexer import HybridLexer, LexSpan, SpanType
@pytest.fixture
def lex():
return HybridLexer(ascii_math_detection=True, min_math_len=3)
class TestLatexDetection:
def test_inline_dollar(self, lex):
spans = lex.lex("Let $x^2 + 1$ be given.")
types = [s.span_type for s in spans if s.content.strip()]
assert SpanType.MATH in types
assert SpanType.TEXT in types
def test_display_dollar(self, lex):
spans = lex.lex("$$x^2 + y^2 = 1$$")
math_spans = [s for s in spans if s.span_type is SpanType.MATH]
assert len(math_spans) >= 1
assert "x^2" in math_spans[0].content or "x" in math_spans[0].content
def test_inline_paren(self, lex):
spans = lex.lex("We have \\(a + b\\) here.")
math_spans = [s for s in spans if s.span_type is SpanType.MATH]
assert len(math_spans) == 1
def test_display_bracket(self, lex):
spans = lex.lex("Result: \\[x = \\frac{-b}{2a}\\]")
math_spans = [s for s in spans if s.span_type is SpanType.MATH]
assert len(math_spans) == 1
def test_multiple_math_spans(self, lex):
spans = lex.lex("If $a > 0$ and $b < 0$, then $a + b$ may be zero.")
math_spans = [s for s in spans if s.span_type is SpanType.MATH]
assert len(math_spans) == 3
def test_pure_text(self, lex):
spans = lex.lex("This is plain English text with no math at all.")
math_spans = [s for s in spans if s.span_type is SpanType.MATH]
assert len(math_spans) == 0
class TestAsciiDetection:
def test_function_call(self, lex):
spans = lex.lex("Compute sin(x) for x = pi.")
math_spans = [s for s in spans if s.span_type is SpanType.MATH]
assert any("sin" in s.content for s in math_spans)
def test_exponentiation(self, lex):
spans = lex.lex("The value of x^2 is always positive.")
math_spans = [s for s in spans if s.span_type is SpanType.MATH]
assert len(math_spans) >= 1
def test_equation(self, lex):
spans = lex.lex("Solve x^2 + 2*x + 1 = 0.")
math_spans = [s for s in spans if s.span_type is SpanType.MATH]
assert len(math_spans) >= 1
class TestEdgeCases:
def test_empty_string(self, lex):
spans = lex.lex("")
assert spans == []
def test_only_whitespace(self, lex):
spans = lex.lex(" ")
assert all(s.span_type is SpanType.TEXT for s in spans)
def test_is_math_only_true(self, lex):
assert lex.is_math_only("$x^2 + 1$")
def test_adjacent_spans_merged(self, lex):
spans = lex.lex("hello world, no math here at all.")
# All-text should be merged into a minimal number of spans
text_spans = [s for s in spans if s.span_type is SpanType.TEXT]
assert len(text_spans) <= 2
|