Rick-AdaptKey commited on
Commit
b415be3
·
verified ·
1 Parent(s): 7b5059a

Upload configs/telecom-1.35M-v2.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. configs/telecom-1.35M-v2.yaml +50 -0
configs/telecom-1.35M-v2.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # Telecom 1.35M-v2 Fine-tuning Config
3
+ # Date: 2025-01-15
4
+ # Dataset: ~1.35M telecom examples (augmented network slicing & NF config)
5
+ # GPUs: 4x H100 NVL (devices 0-3)
6
+ # Strategy: Conservative 1 epoch to avoid catastrophic forgetting
7
+ # Base: telecom-1.27M settings, adjusted iterations for larger dataset
8
+ # =============================================================================
9
+
10
+ train:
11
+ train_iters: 10500
12
+ global_batch_size: 128 # 4 GPUs
13
+ micro_batch_size: 4
14
+
15
+ model:
16
+ expert_model_parallel_size: 4 # MUST match GPU count
17
+ moe_token_dispatcher_type: alltoall
18
+
19
+ optimizer:
20
+ lr: 5e-5 # Conservative to prevent forgetting
21
+
22
+ scheduler:
23
+ lr_warmup_iters: 525 # 5% of train_iters
24
+ lr_decay_iters: 10500 # Match train_iters
25
+
26
+ logger:
27
+ log_interval: 10
28
+
29
+ checkpoint:
30
+ save_interval: 1000
31
+ pretrained_checkpoint: /models/nemotron-30b-megatron
32
+ save: /models/telecom-1.35M-v2-lora
33
+ # load: /models/telecom-1.35M-v2-lora # Uncomment to resume
34
+
35
+ dataset:
36
+ dataset_name: json
37
+ dataset_root: /models/telecom-1.35M-v2
38
+ hf_kwargs:
39
+ data_files:
40
+ train: /models/telecom-1.35M-v2/train.jsonl
41
+ validation: /models/telecom-1.35M-v2/validation.jsonl
42
+ test: /models/telecom-1.35M-v2/test.jsonl
43
+ dataloader_type: batch
44
+ data_sharding: true
45
+ do_test: false
46
+ do_validation: true
47
+ num_workers: 4
48
+ pin_memory: true
49
+ seed: 42
50
+ seq_length: 2048