Yaongi
/

hybridko-exp7-phase1

Model card Files Files and versions

victor70 commited on Jan 15

Commit

f6260f0

·

verified ·

1 Parent(s): c8ce961

Add training config

Files changed (1) hide show

config.yaml +67 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+# Exp7 Phase 1: Data Quality Check (117M + 2B tokens)
+# Validate data quality via loss curves
+model:
+  d_model: 768
+  n_layers: 12
+  vocab_size: 32000
+  n_heads: 12
+  n_kv_heads: 3
+  ff_mult: 3
+  max_seq_len: 1024
+training:
+  # Optimizer
+  weight_decay: 0.1
+  grad_clip: 1.0
+  # Learning Rate
+  peak_lr: 5.0e-4
+  min_lr: 5.0e-5
+  warmup_steps: 1000
+  # Regularization
+  dropout: 0.1
+  label_smoothing: 0.05
+  # Batch Size - 8 GPUs: 8 * 8 * 2 = 128 effective batch
+  batch_size: 8
+  grad_accum_steps: 2
+  max_length: 1024
+  # Training - 1.29B tokens / 128 batch / 1024 seq = ~10000 steps
+  max_steps: 10000
+  save_steps: 2000
+  eval_steps: 500
+  log_steps: 100
+  # Checkpointing
+  gradient_checkpointing: true
+data:
+  path: data/processed_exp7_phase1
+  # Mix ratios
+  korean_ratio: 0.50
+  english_ratio: 0.30
+  math_ratio: 0.15
+  code_ratio: 0.05
+tokenizer:
+  vocab_size: 32000
+  model_type: unigram
+  character_coverage: 0.9995
+distributed:
+  enabled: true
+  world_size: 8
+  backend: nccl
+# Phase 1 specific settings
+phase:
+  name: "data_quality_check"
+  total_tokens: "2B"
+  analysis:
+    - "loss_curve_by_domain"
+    - "perplexity_tracking"
+    - "data_mix_optimization"
+    - "benchmark_evaluation"