File size: 4,328 Bytes
887c99e 7149205 887c99e 7149205 887c99e 7149205 887c99e 7149205 887c99e 7149205 887c99e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
{
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bias_removal": true,
"bos_token_id": 128000,
"censorship": false,
"eos_token_id": [
128001,
128008,
128009
],
"fused_tensor_size": 494674944,
"generation_tuning": {
"max_length": 20,
"max_new_tokens": 100,
"min_length": 0,
"min_new_tokens": null,
"early_stopping": false,
"max_time": null,
"stop_strings": null,
"do_sample": true,
"num_beams": 1,
"num_beam_groups": 1,
"penalty_alpha": null,
"dola_layers": null,
"use_cache": true,
"cache_implementation": null,
"cache_config": null,
"return_legacy_cache": null,
"prefill_chunk_size": null,
"temperature": 0.7,
"top_k": 50,
"top_p": 0.9,
"min_p": null,
"typical_p": 1.0,
"epsilon_cutoff": 0.0,
"eta_cutoff": 0.0,
"diversity_penalty": 0.0,
"repetition_penalty": 1.2,
"encoder_repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 3,
"bad_words_ids": null,
"force_words_ids": null,
"renormalize_logits": false,
"constraints": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"forced_decoder_ids": null,
"sequence_bias": null,
"token_healing": false,
"guidance_scale": null,
"low_memory": null,
"watermarking_config": null,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"output_logits": null,
"return_dict_in_generate": false,
"pad_token_id": null,
"bos_token_id": null,
"eos_token_id": null,
"encoder_no_repeat_ngram_size": 0,
"decoder_start_token_id": null,
"is_assistant": false,
"num_assistant_tokens": 20,
"num_assistant_tokens_schedule": "constant",
"assistant_confidence_threshold": 0.4,
"prompt_lookup_num_tokens": null,
"max_matching_ngram_size": null,
"assistant_early_exit": null,
"assistant_lookbehind": 10,
"target_lookbehind": 10,
"disable_compile": false,
"generation_kwargs": {},
"_from_model_config": false,
"transformers_version": "4.51.3"
},
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 3072,
"initializer_range": 0.02,
"intermediate_size": 8192,
"max_position_embeddings": 8000,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 24,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 128004,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 32.0,
"high_freq_factor": 4.0,
"low_freq_factor": 1.0,
"original_max_position_embeddings": 8192,
"rope_type": "llama3"
},
"rope_theta": 500000.0,
"tie_word_embeddings": true,
"torch_dtype": "float32",
"transformers_version": "4.51.3",
"unsloth_version": "2025.2.15",
"use_cache": true,
"vocab_size": 128256,
"fusion": {
"layers_merged": 28
},
"tensor_fusion": true,
"decode_functions": [
"decode_tokens",
"decode_parameters",
"decode_responses",
"decode_layers",
"decode_neurons",
"decode_tensors",
"decode_architecture",
"decode_fused_tensor"
],
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n---\n<|start_header_id|>system<|end_header_id|>\nCutting Knowledge Date: December 2025\nToday Date: {{ date_string }}\n---\n{{ system_message }}\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n# Mecanismos incluidos:\n- Fusi\u00f3n de todas las layers en 1\n- Fusi\u00f3n de todos los tensores en uno\n- Eliminaci\u00f3n de bias y censura\n- Configuraci\u00f3n de generaci\u00f3n: do_sample=True, temp=0.7, top_p=0.9, penalidad=1.2\n- Funciones de decodificaci\u00f3n completas\n---\n<|eot_id|>"
} |