Ennon commited on
Commit
e1e4225
·
verified ·
1 Parent(s): 8048939

Phi-3.5 DevOps v2 - YAML indentation fix

Browse files
README.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - pl
4
+ - en
5
+ license: mit
6
+ tags:
7
+ - devops
8
+ - kubernetes
9
+ - ansible
10
+ - terraform
11
+ - yaml
12
+ base_model: microsoft/Phi-3.5-mini-instruct
13
+ ---
14
+
15
+ # Phi-3.5-mini-PL-DevOps-Instruct-v2
16
+
17
+ Polish DevOps assistant fine-tuned on Infrastructure as Code tasks.
18
+
19
+ ## ⚠️ Fixes in v2
20
+ - **Fixed YAML indentation** - consistent 2-space indentation
21
+ - **High Quality Training** - Native BF16 training (no quantization errors)
22
+ - Trained WITHOUT Unsloth (no padding-free mode)
23
+ - `packing=False` to preserve whitespace
24
+
25
+ ## Evaluation / Inference
26
+ This model is saved in **BFLOAT16**.
27
+ - For 4-bit inference: Load with `load_in_4bit=True` (bitsandbytes)
28
+ - For vLLM: Compatible with standard loading or FP8/AWQ quantization
29
+
30
+ ## Training
31
+ | Param | Value |
32
+ |-------|-------|
33
+ | Base | microsoft/Phi-3.5-mini-instruct |
34
+ | Method | Full BF16 Finetuning + LoRA |
35
+ | Batch | 96 effective |
36
+ | Train samples | 172,145 |
37
+ | Train loss | 0.5981 |
38
+ | Time | 147.3 min |
39
+ | GPU | H100 80GB |
chat_template.jinja ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>
2
+ ' + message['content'] + '<|end|>
3
+ '}}{% elif message['role'] == 'user' %}{{'<|user|>
4
+ ' + message['content'] + '<|end|>
5
+ '}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
6
+ ' + message['content'] + '<|end|>
7
+ '}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
8
+ ' }}{% else %}{{ eos_token }}{% endif %}
config.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Phi3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_phi3.Phi3Config",
9
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
10
+ },
11
+ "bos_token_id": 1,
12
+ "dtype": "bfloat16",
13
+ "embd_pdrop": 0.0,
14
+ "eos_token_id": 32000,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 3072,
17
+ "ignore_keys_at_rope_validation": null,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 8192,
20
+ "max_position_embeddings": 131072,
21
+ "model_type": "phi3",
22
+ "num_attention_heads": 32,
23
+ "num_hidden_layers": 32,
24
+ "num_key_value_heads": 32,
25
+ "original_max_position_embeddings": 4096,
26
+ "pad_token_id": 32009,
27
+ "partial_rotary_factor": 1.0,
28
+ "resid_pdrop": 0.0,
29
+ "rms_norm_eps": 1e-05,
30
+ "rope_parameters": {
31
+ "long_factor": [
32
+ 1.0800000429153442,
33
+ 1.1100000143051147,
34
+ 1.1399999856948853,
35
+ 1.340000033378601,
36
+ 1.5899999141693115,
37
+ 1.600000023841858,
38
+ 1.6200000047683716,
39
+ 2.620000123977661,
40
+ 3.2300000190734863,
41
+ 3.2300000190734863,
42
+ 4.789999961853027,
43
+ 7.400000095367432,
44
+ 7.700000286102295,
45
+ 9.09000015258789,
46
+ 12.199999809265137,
47
+ 17.670000076293945,
48
+ 24.46000099182129,
49
+ 28.57000160217285,
50
+ 30.420001983642578,
51
+ 30.840002059936523,
52
+ 32.590003967285156,
53
+ 32.93000411987305,
54
+ 42.320003509521484,
55
+ 44.96000289916992,
56
+ 50.340003967285156,
57
+ 50.45000457763672,
58
+ 57.55000305175781,
59
+ 57.93000411987305,
60
+ 58.21000289916992,
61
+ 60.1400032043457,
62
+ 62.61000442504883,
63
+ 62.62000274658203,
64
+ 62.71000289916992,
65
+ 63.1400032043457,
66
+ 63.1400032043457,
67
+ 63.77000427246094,
68
+ 63.93000411987305,
69
+ 63.96000289916992,
70
+ 63.970001220703125,
71
+ 64.02999877929688,
72
+ 64.06999969482422,
73
+ 64.08000183105469,
74
+ 64.12000274658203,
75
+ 64.41000366210938,
76
+ 64.4800033569336,
77
+ 64.51000213623047,
78
+ 64.52999877929688,
79
+ 64.83999633789062
80
+ ],
81
+ "original_max_position_embeddings": 4096,
82
+ "partial_rotary_factor": 1.0,
83
+ "rope_theta": 10000.0,
84
+ "rope_type": "longrope",
85
+ "short_factor": [
86
+ 1.0,
87
+ 1.0199999809265137,
88
+ 1.0299999713897705,
89
+ 1.0299999713897705,
90
+ 1.0499999523162842,
91
+ 1.0499999523162842,
92
+ 1.0499999523162842,
93
+ 1.0499999523162842,
94
+ 1.0499999523162842,
95
+ 1.0699999332427979,
96
+ 1.0999999046325684,
97
+ 1.1099998950958252,
98
+ 1.1599998474121094,
99
+ 1.1599998474121094,
100
+ 1.1699998378753662,
101
+ 1.2899998426437378,
102
+ 1.339999794960022,
103
+ 1.679999828338623,
104
+ 1.7899998426437378,
105
+ 1.8199998140335083,
106
+ 1.8499997854232788,
107
+ 1.8799997568130493,
108
+ 1.9099997282028198,
109
+ 1.9399996995925903,
110
+ 1.9899996519088745,
111
+ 2.0199997425079346,
112
+ 2.0199997425079346,
113
+ 2.0199997425079346,
114
+ 2.0199997425079346,
115
+ 2.0199997425079346,
116
+ 2.0199997425079346,
117
+ 2.0299997329711914,
118
+ 2.0299997329711914,
119
+ 2.0299997329711914,
120
+ 2.0299997329711914,
121
+ 2.0299997329711914,
122
+ 2.0299997329711914,
123
+ 2.0299997329711914,
124
+ 2.0299997329711914,
125
+ 2.0299997329711914,
126
+ 2.0799996852874756,
127
+ 2.0899996757507324,
128
+ 2.189999580383301,
129
+ 2.2199995517730713,
130
+ 2.5899994373321533,
131
+ 2.729999542236328,
132
+ 2.749999523162842,
133
+ 2.8399994373321533
134
+ ],
135
+ "type": "longrope"
136
+ },
137
+ "sliding_window": 262144,
138
+ "tie_word_embeddings": false,
139
+ "transformers_version": "5.0.0",
140
+ "use_cache": false,
141
+ "vocab_size": 32064
142
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 32007,
6
+ 32001,
7
+ 32000
8
+ ],
9
+ "pad_token_id": 32009,
10
+ "transformers_version": "5.0.0"
11
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d515ff81e9addf6882869208feee3bc418357ea1b6db27e7f07e869ebdbb0e7
3
+ size 7642181896
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|endoftext|>",
6
+ "is_local": false,
7
+ "legacy": false,
8
+ "model_max_length": 4096,
9
+ "pad_token": "<|placeholder6|>",
10
+ "padding_side": "right",
11
+ "sp_model_kwargs": {},
12
+ "tokenizer_class": "TokenizersBackend",
13
+ "unk_token": "<unk>",
14
+ "use_default_system_prompt": false
15
+ }
trainer_log_history.json ADDED
The diff for this file is too large to render. See raw diff
 
training_metadata.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "microsoft/Phi-3.5-mini-instruct",
3
+ "display_name": "Phi-3.5 (BF16, Liger, Batch96, CkptON)",
4
+ "timestamp": "2026-02-03T19:35:19.489805",
5
+ "training_config": {
6
+ "num_train_epochs": 1,
7
+ "per_device_train_batch_size": 96,
8
+ "gradient_accumulation_steps": 1,
9
+ "learning_rate": 0.0002,
10
+ "warmup_ratio": 0.03,
11
+ "lr_scheduler_type": "cosine",
12
+ "weight_decay": 0.01,
13
+ "max_seq_length": 4096,
14
+ "logging_steps": 1,
15
+ "eval_steps": 50,
16
+ "save_steps": 200,
17
+ "seed": 42,
18
+ "bf16": true,
19
+ "optim": "adamw_torch_fused",
20
+ "dataloader_num_workers": 8,
21
+ "torch_compile": true
22
+ },
23
+ "lora_config": {
24
+ "r": 16,
25
+ "lora_alpha": 32,
26
+ "lora_dropout": 0.05,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "k_proj",
30
+ "v_proj",
31
+ "o_proj",
32
+ "gate_proj",
33
+ "up_proj",
34
+ "down_proj"
35
+ ],
36
+ "bias": "none",
37
+ "task_type": "CAUSAL_LM"
38
+ },
39
+ "train_loss": 0.5981301681586066,
40
+ "train_samples": 172145,
41
+ "val_samples": 9066,
42
+ "train_time_minutes": 147.3416652202606,
43
+ "max_memory_gb": 54.91591787338257,
44
+ "fix_applied": "YAML normalization via PyYAML (2 spaces), packing=False, Native BF16 Training, Batch 96"
45
+ }