{ "experiment_name": "dmhy-char-virtual-sps32-10epoch-lightfocus", "data_file": "data/generated/focus_after_virtual_sps32_char.jsonl", "data_sources": [ { "role": "primary", "path": "data/generated/focus_after_virtual_sps32_char.jsonl", "samples": 140660, "repeat": 1, "effective_samples": 140660 } ], "augmentation": { "partial_requested": 0, "partial_written": 0, "permutation_requested": 0, "permutation_written": 0, "special_requested": 0, "special_written": 0, "max_chars": 160 }, "dataset_mode": "encoded", "virtual_dataset_dir": null, "apply_label_repairs": false, "keep_raw_dataset": false, "tokenizer_variant": "char", "vocab_file": "datasets/AnimeName/vocab.char.json", "vocab_size": 6199, "max_seq_length": 128, "hidden_size": 256, "num_hidden_layers": 4, "num_attention_heads": 8, "intermediate_size": 1024, "train_samples": 133627, "eval_samples": 7033, "load_seconds": 3.860345099994447, "encode_seconds": 11.22450440004468, "epochs": 1.0, "max_steps": -1, "batch_size": 1792, "learning_rate": 2e-06, "warmup_steps": 20, "seed": 208, "device": "cuda", "fp16": false, "gradient_accumulation_steps": 1, "dataloader_num_workers": 0, "dataloader_prefetch_factor": null, "dataloader_persistent_workers": false, "dataloader_pin_memory": true, "encoded_dataset_device": "cpu", "mixed_precision": "bf16", "tf32": true, "torch_compile": false, "auto_find_batch_size": false, "perf_log_steps": 50, "perf_sample_interval": 0.5, "periodic_eval": false }