{ "model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", "output_dir": "./outputs/WorldDisasterLM-8B", "num_train_epochs": 3, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "gradient_accumulation_steps": 4, "gradient_checkpointing": true, "learning_rate": 0.0002, "lr_scheduler_type": "cosine", "warmup_ratio": 0.03, "weight_decay": 0.001, "max_grad_norm": 0.3, "optim": "paged_adamw_32bit", "fp16": false, "bf16": true, "max_seq_length": 4096, "packing": true, "lora_r": 16, "lora_alpha": 32, "lora_dropout": 0.05, "lora_target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ], "use_4bit": true, "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": "bfloat16", "use_nested_quant": true, "save_steps": 100, "logging_steps": 25, "evaluation_strategy": "steps", "eval_steps": 100, "save_total_limit": 3, "load_best_model_at_end": true, "metric_for_best_model": "eval_loss", "dataloader_num_workers": 4, "seed": 42, "report_to": [ "tensorboard" ], "dataset_sources": [ "ReliefWeb", "USGS", "GDACS", "NOAA", "OpenFEMA", "WHO" ], "dataset_size": "88+ live records → 711+ instruction samples per run", "languages": [ "en", "ne", "es", "fr", "ar", "hi", "te", "zh", "ja", "ko", "pt" ], "language_names": [ "English", "Nepali (नेपाली)", "Spanish", "French", "Arabic", "Hindi", "Telugu", "Chinese", "Japanese", "Korean", "Portuguese" ], "training_status": "PENDING — weights not yet generated. Run: python train.py" }