Phi-3.5 DevOps v2 - YAML indentation fix

Browse files

Files changed (9) hide show

README.md +39 -0
chat_template.jinja +8 -0
config.json +142 -0
generation_config.json +11 -0
model.safetensors +3 -0
tokenizer.json +0 -0
tokenizer_config.json +15 -0
trainer_log_history.json +0 -0
training_metadata.json +45 -0

README.md ADDED Viewed

	@@ -0,0 +1,39 @@

+---
+language:
+- pl
+- en
+license: mit
+tags:
+- devops
+- kubernetes
+- ansible
+- terraform
+- yaml
+base_model: microsoft/Phi-3.5-mini-instruct
+---
+# Phi-3.5-mini-PL-DevOps-Instruct-v2
+Polish DevOps assistant fine-tuned on Infrastructure as Code tasks.
+## ⚠️ Fixes in v2
+- **Fixed YAML indentation** - consistent 2-space indentation
+- **High Quality Training** - Native BF16 training (no quantization errors)
+- Trained WITHOUT Unsloth (no padding-free mode)
+- `packing=False` to preserve whitespace
+## Evaluation / Inference
+This model is saved in **BFLOAT16**.
+- For 4-bit inference: Load with `load_in_4bit=True` (bitsandbytes)
+- For vLLM: Compatible with standard loading or FP8/AWQ quantization
+## Training
+| Param | Value |
+|-------|-------|
+| Base | microsoft/Phi-3.5-mini-instruct |
+| Method | Full BF16 Finetuning + LoRA |
+| Batch | 96 effective |
+| Train samples | 172,145 |
+| Train loss | 0.5981 |
+| Time | 147.3 min |
+| GPU | H100 80GB |

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,8 @@

+{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'user' %}{{'<|user|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
+' + message['content'] + '<|end|>
+'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
+' }}{% else %}{{ eos_token }}{% endif %}

config.json ADDED Viewed

	@@ -0,0 +1,142 @@

+{
+  "architectures": [
+    "Phi3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration_phi3.Phi3Config",
+    "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
+  },
+  "bos_token_id": 1,
+  "dtype": "bfloat16",
+  "embd_pdrop": 0.0,
+  "eos_token_id": 32000,
+  "hidden_act": "silu",
+  "hidden_size": 3072,
+  "ignore_keys_at_rope_validation": null,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "model_type": "phi3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "original_max_position_embeddings": 4096,
+  "pad_token_id": 32009,
+  "partial_rotary_factor": 1.0,
+  "resid_pdrop": 0.0,
+  "rms_norm_eps": 1e-05,
+  "rope_parameters": {
+    "long_factor": [
+      1.0800000429153442,
+      1.1100000143051147,
+      1.1399999856948853,
+      1.340000033378601,
+      1.5899999141693115,
+      1.600000023841858,
+      1.6200000047683716,
+      2.620000123977661,
+      3.2300000190734863,
+      3.2300000190734863,
+      4.789999961853027,
+      7.400000095367432,
+      7.700000286102295,
+      9.09000015258789,
+      12.199999809265137,
+      17.670000076293945,
+      24.46000099182129,
+      28.57000160217285,
+      30.420001983642578,
+      30.840002059936523,
+      32.590003967285156,
+      32.93000411987305,
+      42.320003509521484,
+      44.96000289916992,
+      50.340003967285156,
+      50.45000457763672,
+      57.55000305175781,
+      57.93000411987305,
+      58.21000289916992,
+      60.1400032043457,
+      62.61000442504883,
+      62.62000274658203,
+      62.71000289916992,
+      63.1400032043457,
+      63.1400032043457,
+      63.77000427246094,
+      63.93000411987305,
+      63.96000289916992,
+      63.970001220703125,
+      64.02999877929688,
+      64.06999969482422,
+      64.08000183105469,
+      64.12000274658203,
+      64.41000366210938,
+      64.4800033569336,
+      64.51000213623047,
+      64.52999877929688,
+      64.83999633789062
+    ],
+    "original_max_position_embeddings": 4096,
+    "partial_rotary_factor": 1.0,
+    "rope_theta": 10000.0,
+    "rope_type": "longrope",
+    "short_factor": [
+      1.0,
+      1.0199999809265137,
+      1.0299999713897705,
+      1.0299999713897705,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.0699999332427979,
+      1.0999999046325684,
+      1.1099998950958252,
+      1.1599998474121094,
+      1.1599998474121094,
+      1.1699998378753662,
+      1.2899998426437378,
+      1.339999794960022,
+      1.679999828338623,
+      1.7899998426437378,
+      1.8199998140335083,
+      1.8499997854232788,
+      1.8799997568130493,
+      1.9099997282028198,
+      1.9399996995925903,
+      1.9899996519088745,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0799996852874756,
+      2.0899996757507324,
+      2.189999580383301,
+      2.2199995517730713,
+      2.5899994373321533,
+      2.729999542236328,
+      2.749999523162842,
+      2.8399994373321533
+    ],
+    "type": "longrope"
+  },
+  "sliding_window": 262144,
+  "tie_word_embeddings": false,
+  "transformers_version": "5.0.0",
+  "use_cache": false,
+  "vocab_size": 32064
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": [
+    32007,
+    32001,
+    32000
+  ],
+  "pad_token_id": 32009,
+  "transformers_version": "5.0.0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d515ff81e9addf6882869208feee3bc418357ea1b6db27e7f07e869ebdbb0e7
+size 7642181896

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "is_local": false,
+  "legacy": false,
+  "model_max_length": 4096,
+  "pad_token": "<|placeholder6|>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

trainer_log_history.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_metadata.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "model_name": "microsoft/Phi-3.5-mini-instruct",
+  "display_name": "Phi-3.5 (BF16, Liger, Batch96, CkptON)",
+  "timestamp": "2026-02-03T19:35:19.489805",
+  "training_config": {
+    "num_train_epochs": 1,
+    "per_device_train_batch_size": 96,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 0.0002,
+    "warmup_ratio": 0.03,
+    "lr_scheduler_type": "cosine",
+    "weight_decay": 0.01,
+    "max_seq_length": 4096,
+    "logging_steps": 1,
+    "eval_steps": 50,
+    "save_steps": 200,
+    "seed": 42,
+    "bf16": true,
+    "optim": "adamw_torch_fused",
+    "dataloader_num_workers": 8,
+    "torch_compile": true
+  },
+  "lora_config": {
+    "r": 16,
+    "lora_alpha": 32,
+    "lora_dropout": 0.05,
+    "target_modules": [
+      "q_proj",
+      "k_proj",
+      "v_proj",
+      "o_proj",
+      "gate_proj",
+      "up_proj",
+      "down_proj"
+    ],
+    "bias": "none",
+    "task_type": "CAUSAL_LM"
+  },
+  "train_loss": 0.5981301681586066,
+  "train_samples": 172145,
+  "val_samples": 9066,
+  "train_time_minutes": 147.3416652202606,
+  "max_memory_gb": 54.91591787338257,
+  "fix_applied": "YAML normalization via PyYAML (2 spaces), packing=False, Native BF16 Training, Batch 96"
+}