vedang1308 commited on
Commit
11c860f
·
verified ·
1 Parent(s): fb6329e

Epoch 1 | Files 1 | Loss 5.8903

Browse files
config.json CHANGED
@@ -29,7 +29,7 @@
29
  },
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": true,
32
- "transformers_version": "4.57.1",
33
  "use_cache": true,
34
  "vocab_size": 128263
35
  }
 
29
  },
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": true,
32
+ "transformers_version": "4.57.3",
33
  "use_cache": true,
34
  "vocab_size": 128263
35
  }
generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  "eos_token_id": 128001,
6
  "temperature": 0.6,
7
  "top_p": 0.9,
8
- "transformers_version": "4.57.1"
9
  }
 
5
  "eos_token_id": 128001,
6
  "temperature": 0.6,
7
  "top_p": 0.9,
8
+ "transformers_version": "4.57.3"
9
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2cafd5ae6a487404d64fb6582161bb6d7b226008d8b0397bb0b002f854d9479
3
  size 4965842104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f39e991bc53c44b804e11c5f1e3e78581ac8e33c0850d3aa875f1e1e79bb9c3
3
  size 4965842104
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06f762b7274769a7fa66579ebf1508962e3963066edb7a6cd476f7b00d2ee35b
3
  size 1459729952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abb2bd284e95c478cdac7468196656367a6793572ce14e297ca3d4f5d1510fc1
3
  size 1459729952
training_metadata.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
  "epoch": 1,
3
- "files_processed": 64,
4
- "total_files": 147,
5
- "last_file_index": 63,
6
- "avg_loss": 4.358400179250431,
7
- "learning_rate": 5e-06,
8
  "model_name": "meta-llama/Llama-3.2-3B",
 
9
  "processed_files": [],
10
- "timestamp": "2025-11-28T22:47:58.299442"
11
  }
 
1
  {
2
  "epoch": 1,
3
+ "files_processed": 1,
4
+ "total_files": 197,
5
+ "last_file_index": 0,
6
+ "avg_loss": 5.890276033549327,
7
+ "learning_rate": 2e-06,
8
  "model_name": "meta-llama/Llama-3.2-3B",
9
+ "training_run_id": "v2_gentle_retrain",
10
  "processed_files": [],
11
+ "timestamp": "2025-12-19T17:12:51.453734"
12
  }