Text Generation
Transformers
GGUF
PyTorch
English
gpt2
gpt2-small
117M
conversational
grpo
vae
kv-cache
distillation
reinforcement-learning
openclaw
fallback-agent
soul-md
agent-framework
tool-use
task-automation
dpo
tool-masking
uncertainty-estimation
rag
semantic-cache
quantization
pruning
arxiv:2402.03300
| { | |
| "architectures": [ | |
| "GPT2LMHeadModel" | |
| ], | |
| "vocab_size": 50257, | |
| "n_embd": 768, | |
| "n_layer": 12, | |
| "n_head": 12, | |
| "tool_masking": true, | |
| "schema_first": true, | |
| "schema_format": "json", | |
| "mask_ratio": 0.8, | |
| "dpo": false, | |
| "dpo_beta": 0.1, | |
| "uncertainty_threshold": 0.7, | |
| "rag": false, | |
| "rag_topk": 3, | |
| "rag_chunk_size": 256, | |
| "semantic_cache_size": 128, | |
| "semantic_cache_threshold": 0.85, | |
| "quantization_bits": 4, | |
| "quantization_backend": "autogptq", | |
| "pruning_ratio": 0, | |
| "flash_attention": false, | |
| "fused_kernels": false | |
| } |