ysong21 commited on
Commit
13a26a2
·
verified ·
1 Parent(s): ee29072

Upload 4 files

Browse files
adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 32,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "up_proj",
28
+ "q_proj",
29
+ "gate_proj",
30
+ "o_proj",
31
+ "v_proj",
32
+ "k_proj",
33
+ "down_proj"
34
+ ],
35
+ "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_rslora": false
39
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e0e196aa158d3a11ef4b0b023921fae6410a0eedf6d74a5b6ff10c5d4dff1d8
3
+ size 454197288
stats.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"world_size": 4, "epochs": 1, "steps": 94, "seqs": 999, "tokens": 2906680, "last_epoch_steps": 0, "last_epoch_seqs": 0, "last_epoch_tokens": 0, "total_seqs": 999, "nan_in_loss_seqs": 0, "experiment_tracking_run_id": null, "loss_ema": 1.6103686253345062, "loss_sum": 33.14478254318237, "mtp_loss_ema": 0, "mtp_loss_sum": 0, "distillation_loss_ema": 0, "distillation_loss_sum": 0, "hard_loss_ema": 0, "hard_loss_sum": 0, "eval_losses_avg": []}
train_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "comet": false,
3
+ "comet_api_key": null,
4
+ "comet_workspace": null,
5
+ "comet_project": null,
6
+ "comet_run_id": "v6vlpnyh",
7
+ "wandb": true,
8
+ "wandb_entity": "maxsong-carnegie-mellon-university",
9
+ "wandb_project": "entropy",
10
+ "wandb_run_id": "v6vlpnyh",
11
+ "base_model_dir": "/llm-downloader-destination/base/fireworks/gemma-3-27b-it/hf",
12
+ "output_model_dir": "gs://fireworks-artifacts-maxx1999syp-bybv7vrv-254f13/tuned-model-v6vlpnyh/5a2aa8/gemma-3-27b-entropy-02082026/checkpoint",
13
+ "checkpoint_dir": "/dev/shm/checkpoints",
14
+ "gcs_checkpoint_dir": "gs://fireworks-artifacts-maxx1999syp-bybv7vrv-254f13/tuned-model-v6vlpnyh/5a2aa8/gemma-3-27b-entropy-02082026/checkpoints/checkpoints",
15
+ "max_checkpoints_to_keep": 1,
16
+ "checkpoint_interval": 3600,
17
+ "save_final_checkpoint": false,
18
+ "train": true,
19
+ "learning_rate": 0.0002,
20
+ "learning_rate_warmup_steps": 0,
21
+ "grad_accum_steps": 1,
22
+ "epochs": 1,
23
+ "early_stop": false,
24
+ "seed": 42,
25
+ "dataset_dir": "/mnt/staging/dataset",
26
+ "eval_auto_carveout": false,
27
+ "eval_dataset_dir": null,
28
+ "train_limit": null,
29
+ "max_context_len": 8192,
30
+ "batch_size": 32768,
31
+ "batch_size_samples": null,
32
+ "max_data_workers": 0,
33
+ "min_evals_per_epoch": 1,
34
+ "max_evals_per_epoch": 5,
35
+ "precision": null,
36
+ "status_file": "gs://fireworks-fine-tuning-job-status/sftj-maxx1999syp-bybv7vrv-v6vlpnyh-5d74d2ea-e4c7-4e3a-ae4d-deb107b98a9e",
37
+ "billing_file": "gs://fireworks-fine-tuning-metadata/sftj-maxx1999syp-bybv7vrv-v6vlpnyh/billing-5d74d2ea-e4c7-4e3a-ae4d-deb107b98a9e",
38
+ "metrics_file": "gs://fireworks-fine-tuning-metadata/sftj-maxx1999syp-bybv7vrv-v6vlpnyh/metrics.jsonl",
39
+ "trainer_logs_file": null,
40
+ "profile": null,
41
+ "weight_sharding": null,
42
+ "activation_sharding": null,
43
+ "empty_weights": false,
44
+ "nan_ratio_threshold": 0.05,
45
+ "fast_api_port": 80,
46
+ "optimizer": "adamw",
47
+ "optimizer_weight_decay": 0.01,
48
+ "target_shard_size_gb": null,
49
+ "enable_fast_processor": false,
50
+ "peft_addon_dir": null,
51
+ "lora_rank": 32,
52
+ "lora_dropout": 0.05,
53
+ "template_kind": "conversation",
54
+ "template": null,
55
+ "mtp_config": { "enable_mtp": false, "freeze_base_model": false, "num_draft_tokens": 1 },
56
+ "distillation_alpha": null,
57
+ "qat": true,
58
+ "kld": false,
59
+ "teft_tokens": [],
60
+ "skip_dataset_filtering": false
61
+ }