Delta-Vector commited on
Commit
eaf8bd4
·
verified ·
1 Parent(s): 640ee0a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +98 -1
README.md CHANGED
@@ -3,6 +3,11 @@ datasets:
3
  - NewEden/Orion-LIT
4
  - NewEden/Orion-Asstr-Stories-16K
5
  - Mielikki/Erebus-87k
 
 
 
 
 
6
  base_model:
7
  - Delta-Vector/Hamanasu-15B-Instruct
8
  tags:
@@ -231,7 +236,99 @@ Take off your helmet.<|im_end|>
231
  ## Axolotl Config ꒰(˶• ᴗ •˶)꒱
232
 
233
  <details>
234
- ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  </details>
236
  </div>
237
 
 
3
  - NewEden/Orion-LIT
4
  - NewEden/Orion-Asstr-Stories-16K
5
  - Mielikki/Erebus-87k
6
+ - PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
7
+ - Nitral-AI/ARES-ShareGPT
8
+ - Gryphe/Sonnet3.5-SlimOrcaDedupCleaned-20k
9
+ - NewEden/Claude-Instruct-2.7K
10
+ - NewEden/Claude-Instruct-5K
11
  base_model:
12
  - Delta-Vector/Hamanasu-15B-Instruct
13
  tags:
 
236
  ## Axolotl Config ꒰(˶• ᴗ •˶)꒱
237
 
238
  <details>
239
+ base_model: NewEden_Phi-PT-merged-LIT
240
+ model_type: AutoModelForCausalLM
241
+ tokenizer_type: AutoTokenizer
242
+
243
+ plugins:
244
+ - axolotl.integrations.liger.LigerPlugin
245
+ liger_rope: true
246
+ liger_rms_norm: true
247
+ liger_swiglu: true
248
+ liger_fused_linear_cross_entropy: true
249
+
250
+
251
+ load_in_8bit: false
252
+ load_in_4bit: false
253
+ strict: false
254
+
255
+ datasets:
256
+ - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
257
+ type: sharegpt
258
+ - path: Nitral-AI/ARES-ShareGPT
259
+ type: sharegpt
260
+ - path: Gryphe/Sonnet3.5-SlimOrcaDedupCleaned-20k
261
+ type: sharegpt
262
+ - path: NewEden/Claude-Instruct-2.7K
263
+ type: sharegpt
264
+ - path: NewEden/Claude-Instruct-5K
265
+ type: sharegpt
266
+
267
+ shuffle_merged_datasets: true
268
+ dataset_prepared_path: prepared_data
269
+ val_set_size: 0.0
270
+ output_dir: ./phi4-inst-out-r2
271
+
272
+ sequence_len: 16384
273
+ sample_packing: true
274
+ pad_to_sequence_len: true
275
+
276
+ adapter: lora
277
+ lora_model_dir:
278
+ lora_r: 128
279
+ lora_alpha: 16
280
+ lora_dropout: 0.05
281
+ lora_target_modules:
282
+ - gate_proj
283
+ - down_proj
284
+ - up_proj
285
+ - q_proj
286
+ - v_proj
287
+ - k_proj
288
+ - o_proj
289
+
290
+ lora_modules_to_save:
291
+ - embed_tokens
292
+ - lm_head
293
+
294
+
295
+ wandb_project: mag-phi
296
+ wandb_entity:
297
+ wandb_watch:
298
+ wandb_name: inst-attempt-02
299
+ wandb_log_model:
300
+
301
+ gradient_accumulation_steps: 4
302
+ micro_batch_size: 2
303
+ num_epochs: 4
304
+ optimizer: paged_ademamix_8bit
305
+ lr_scheduler: cosine
306
+ learning_rate: 0.000025
307
+
308
+ train_on_inputs: false
309
+ group_by_length: false
310
+ bf16: auto
311
+ fp16:
312
+ tf32: false
313
+
314
+ gradient_checkpointing: unsloth
315
+ early_stopping_patience:
316
+ resume_from_checkpoint:
317
+ local_rank:
318
+ logging_steps: 1
319
+ xformers_attention:
320
+ flash_attention: true
321
+
322
+ warmup_steps: 15
323
+ evals_per_epoch: 4
324
+ eval_table_size:
325
+ eval_max_new_tokens: 128
326
+ saves_per_epoch: 2
327
+ debug:
328
+ deepspeed: /workspace/axolotl/deepspeed_configs/zero3_bf16_cpuoffload_params.json
329
+ weight_decay: 0.01
330
+ fsdp:
331
+ fsdp_config:
332
  </details>
333
  </div>
334