{ "model_type": "sprog", "architecture": "seq2seq-transformer", "framework": "mlx", "d": 304, "n_heads": 4, "n_layers": 4, "ff": 608, "max_src": 220, "max_tgt": 64, "src_vocab_size": 6000, "tgt_vocab_size": 79, "n_params": 9370000, "task": "gsm8k-math-word-problems", "inference": "self-consistency (96 samples) + symbolic verifier", "license": "mit" }