{ "quantization": "INT4", "compute_units": "cpuAndGPU", "hidden_size": 2048, "vocab_size": 65536, "max_seq_len": 512, "architecture": "lfm25", "layer_types": [ "conv", "conv", "full_attention", "conv", "conv", "full_attention", "conv", "conv", "full_attention", "conv", "full_attention", "conv", "full_attention", "conv", "full_attention", "conv" ] }