Text Generation
Transformers
English
custom
tokenizer
symbolic-ai
mathematics
llm
reasoning
ast
compiler
nlp
deep-learning
machine-learning
mathematical-reasoning
symbolic-reasoning
tokenization
parser
artificial-intelligence
Eval Results (legacy)
Instructions to use SurweeshSP/mathtok with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use SurweeshSP/mathtok with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="SurweeshSP/mathtok")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("SurweeshSP/mathtok", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use SurweeshSP/mathtok with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "SurweeshSP/mathtok" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SurweeshSP/mathtok", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/SurweeshSP/mathtok
- SGLang
How to use SurweeshSP/mathtok with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "SurweeshSP/mathtok" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SurweeshSP/mathtok", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "SurweeshSP/mathtok" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SurweeshSP/mathtok", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use SurweeshSP/mathtok with Docker Model Runner:
docker model run hf.co/SurweeshSP/mathtok
| """ | |
| Integration tests for the end-to-end MathTok Pipeline. | |
| """ | |
| import pytest | |
| from mathtok.pipeline import MathTokPipeline, TokenizedOutput | |
| def pipeline(): | |
| return MathTokPipeline(include_metadata=True) | |
| class TestBasicEncode: | |
| def test_returns_output(self, pipeline): | |
| out = pipeline.encode("x^2 + 1") | |
| assert isinstance(out, TokenizedOutput) | |
| def test_tokens_nonempty(self, pipeline): | |
| out = pipeline.encode("sin(x)") | |
| assert len(out.tokens) > 0 | |
| def test_input_ids_match_tokens(self, pipeline): | |
| out = pipeline.encode("x^2 + 2*x + 1") | |
| assert len(out.tokens) == len(out.input_ids) | |
| def test_ids_are_integers(self, pipeline): | |
| out = pipeline.encode("x + 1") | |
| assert all(isinstance(i, int) for i in out.input_ids) | |
| def test_no_negative_ids(self, pipeline): | |
| out = pipeline.encode("x + 1") | |
| # All IDs should be non-negative (UNK=1 is minimum valid) | |
| assert all(i >= 0 for i in out.input_ids) | |
| class TestMathSpans: | |
| def test_math_start_end_tokens(self, pipeline): | |
| out = pipeline.encode("x^2") | |
| assert "[MATH_START]" in out.tokens | |
| assert "[MATH_END]" in out.tokens | |
| def test_sexp_nonempty(self, pipeline): | |
| out = pipeline.encode("x^2 + 1") | |
| assert len(out.sexp) > 0 | |
| def test_sexp_contains_op(self, pipeline): | |
| out = pipeline.encode("x^2") | |
| assert "OP_POW" in out.sexp | |
| def test_canon_results(self, pipeline): | |
| # Use a simple ASCII expression guaranteed to parse successfully | |
| out = pipeline.encode("x^2 + 1") | |
| assert len(out.canon_results) >= 1 | |
| assert out.canon_results[0].success | |
| class TestMixedInput: | |
| def test_mixed_latex(self, pipeline): | |
| out = pipeline.encode("The result is $x^2 + 1$.") | |
| assert len(out.tokens) > 0 | |
| def test_mixed_ascii(self, pipeline): | |
| out = pipeline.encode("Compute sin(x) for x = pi.") | |
| assert len(out.tokens) > 0 | |
| def test_multiple_math_spans(self, pipeline): | |
| out = pipeline.encode("If $a > 0$ and $b < 0$ then $a + b$ can be zero.") | |
| # Should have at least some math tokens | |
| math_toks = [t for t in out.tokens if t.startswith("OP_") or t.startswith("VAR_")] | |
| assert len(math_toks) > 0 | |
| class TestMetadata: | |
| def test_metadata_present(self, pipeline): | |
| out = pipeline.encode("x + 1") | |
| assert len(out.metadata) > 0 | |
| def test_metadata_positions_sequential(self, pipeline): | |
| out = pipeline.encode("x^2 + 1") | |
| positions = [m.position for m in out.metadata] | |
| assert positions == sorted(positions) | |
| def test_metadata_categories(self, pipeline): | |
| out = pipeline.encode("x + 1") | |
| categories = {m.token_category for m in out.metadata} | |
| assert "operator" in categories or "variable" in categories or "constant" in categories | |
| def test_tree_position_keys(self, pipeline): | |
| out = pipeline.encode("x + 1") | |
| keys = [m.tree_position_key for m in out.metadata if m.node_id >= 0] | |
| assert len(keys) > 0 | |
| assert all(isinstance(k, str) for k in keys) | |
| class TestEncodeMathOnly: | |
| def test_encode_math_only(self, pipeline): | |
| out = pipeline.encode_math_only("x^2 + 2*x + 1") | |
| assert len(out.tokens) > 0 | |
| assert "OP_ADD" in out.tokens or "OP_POW" in out.tokens | |
| def test_encode_batch(self, pipeline): | |
| exprs = ["x + 1", "sin(x)", "x^2"] | |
| outs = pipeline.encode_batch(exprs) | |
| assert len(outs) == 3 | |
| assert all(len(o.tokens) > 0 for o in outs) | |
| class TestHFTokenizer: | |
| def test_hf_tokenizer_callable(self, pipeline): | |
| hf_tok = pipeline.get_hf_tokenizer() | |
| result = hf_tok("x^2 + 1") | |
| assert "input_ids" in result | |
| assert len(result["input_ids"]) == 1 | |
| def test_hf_tokenizer_encode(self, pipeline): | |
| hf_tok = pipeline.get_hf_tokenizer() | |
| ids = hf_tok.encode("sin(x)") | |
| assert isinstance(ids, list) | |
| assert len(ids) > 0 | |
| def test_hf_vocab_size(self, pipeline): | |
| hf_tok = pipeline.get_hf_tokenizer() | |
| assert len(hf_tok) > 100 | |