{ "architectures": [ "Gemma2ForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "attn_logit_softcapping": 50.0, "bos_token_id": 2, "cache_implementation": "hybrid", "eos_token_id": 1, "final_logit_softcapping": 30.0, "generation_config": { "_from_model_config": true, "assistant_confidence_threshold": 0.4, "assistant_early_exit": null, "assistant_lookbehind": 10, "bad_words_ids": null, "begin_suppress_tokens": null, "bos_token_id": 2, "cache_config": null, "cache_implementation": "hybrid", "constraints": null, "decoder_start_token_id": null, "disable_compile": false, "diversity_penalty": 0.0, "do_sample": false, "dola_layers": null, "early_stopping": false, "encoder_no_repeat_ngram_size": 0, "encoder_repetition_penalty": 1.0, "eos_token_id": 1, "epsilon_cutoff": 0.0, "eta_cutoff": 0.0, "exponential_decay_length_penalty": null, "force_words_ids": null, "forced_bos_token_id": null, "forced_decoder_ids": null, "forced_eos_token_id": null, "generation_kwargs": {}, "guidance_scale": null, "is_assistant": false, "length_penalty": 1.0, "low_memory": null, "max_length": 20, "max_matching_ngram_size": null, "max_new_tokens": null, "max_time": null, "min_length": 0, "min_new_tokens": null, "min_p": null, "no_repeat_ngram_size": 0, "num_assistant_tokens": 20, "num_assistant_tokens_schedule": "constant", "num_beam_groups": 1, "num_beams": 1, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_logits": null, "output_scores": false, "pad_token_id": 0, "penalty_alpha": null, "prompt_lookup_num_tokens": null, "remove_invalid_values": false, "renormalize_logits": false, "repetition_penalty": 1.0, "return_dict_in_generate": false, "return_legacy_cache": null, "sequence_bias": null, "stop_strings": null, "suppress_tokens": null, "target_lookbehind": 10, "temperature": null, "token_healing": false, "top_k": 50, "top_p": null, "transformers_version": "4.50.1", "typical_p": 1.0, "use_cache": true, "watermarking_config": null }, "head_dim": 256, "hidden_act": "gelu_pytorch_tanh", "hidden_activation": "gelu_pytorch_tanh", "hidden_size": 3584, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "gemma2", "num_attention_heads": 16, "num_hidden_layers": 42, "num_key_value_heads": 8, "pad_token_id": 0, "query_pre_attn_scalar": 256, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "sliding_window": 4096, "sliding_window_size": 4096, "torch_dtype": "bfloat16", "transformers_version": "4.50.1", "use_cache": true, "vocab_size": 256000 }