soughtlin commited on
Commit
bdab766
·
verified ·
1 Parent(s): bb8080b

Upload 8 files

Browse files
rnn/dot/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e769ed3b93d68a94881fc269f9a8fbc40d10862b0039f6db3079a6d608caeab2
3
+ size 443012514
rnn/dot/config.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ------------- 分词器 -----------------
2
+ tokenizer: tokenizer.NltkTokenizer # 可改为自定义类的全路径
3
+
4
+ # ------------- 模型结构 (针对 100K 数据 & 8G 显存优化) ----------------
5
+ model:
6
+ type: "rnn"
7
+ encoder_embedding_dim: 256
8
+ decoder_embedding_dim: 256
9
+ hidden_dim: 512 # GRU 隐藏层维度
10
+ n_layers: 2 # 作业要求: 2 layers
11
+ dropout: 0.1
12
+ # 关键参数: 探索不同的 attention 方式
13
+ # 可选项: "dot" (默认), "general", "concat"
14
+ attention_method: "dot"
15
+
16
+ # ------------- 训练超参 ----------------
17
+ train:
18
+ batch_size: 32
19
+ epochs: 10
20
+ lr: 0.0005
21
+ weight_decay: 0.0001
22
+ clip: 1.0 # 梯度裁剪阈值
23
+ # 学习率调度参数
24
+ lr_patience: 2
25
+ lr_factor: 0.5
26
+ save_dir: runs/train/Nltk_100k
27
+ num_workers: 0
28
+
29
+ # ------------- 数据路径 ----------------
30
+ data:
31
+ raw_train: data/train_100k.jsonl # 输入文件不变
32
+ raw_val: data/valid.jsonl
33
+ raw_test: data/test.jsonl
34
+
35
+ processed_dir: data/processed_nltk_100k
36
+ train_processed: data/processed_nltk_100k/train.jsonl
37
+ val_processed: data/processed_nltk_100k/val.jsonl
38
+ test_processed: data/processed_nltk_100k/test.jsonl
39
+
40
+ src_vocab: data/processed_nltk_100k/src_vocab.pkl
41
+ tgt_vocab: data/processed_nltk_100k/tgt_vocab.pkl
42
+ min_freq: 2
43
+
44
+ # ------------- 其余 --------------------
45
+ seed: 3407 # 固定随机种子,保证可复现
rnn/dot/log.txt ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Info] Logging started. Output will be saved to runs\train\Nltk_100k\rnn\20251227_2104\log.txt
2
+ [Info] Model Architecture: RNN
3
+ Seq2SeqRNN(
4
+ (encoder): EncoderRNN(
5
+ (embedding): Embedding(34122, 256)
6
+ (gru): GRU(256, 512, num_layers=2, batch_first=True, dropout=0.1)
7
+ (dropout): Dropout(p=0.1, inplace=False)
8
+ )
9
+ (decoder): DecoderRNN(
10
+ (embedding): Embedding(28647, 256)
11
+ (gru): GRU(256, 512, num_layers=2, batch_first=True, dropout=0.1)
12
+ (dropout): Dropout(p=0.1, inplace=False)
13
+ (attention): Attention(
14
+ (attn): Linear(in_features=512, out_features=512, bias=True)
15
+ )
16
+ (concat): Linear(in_features=1024, out_features=512, bias=True)
17
+ (out): Linear(in_features=512, out_features=28647, bias=True)
18
+ )
19
+ )
20
+ [Info] Total Parameters: 37,069,543
21
+ [Info] Trainable Parameters: 37,069,543
22
+ [Info] Model Size (approx): 141.41 MB
23
+ ===== Epoch 1/20 =====
24
+ [Info] Logging started. Output will be saved to runs\train\Nltk_100k\rnn\20251227_2104\log.txt
25
+ [Info] Model Architecture: RNN
26
+ Seq2SeqRNN(
27
+ (encoder): EncoderRNN(
28
+ (embedding): Embedding(34122, 256)
29
+ (gru): GRU(256, 512, num_layers=2, batch_first=True, dropout=0.1)
30
+ (dropout): Dropout(p=0.1, inplace=False)
31
+ )
32
+ (decoder): DecoderRNN(
33
+ (embedding): Embedding(28647, 256)
34
+ (gru): GRU(256, 512, num_layers=2, batch_first=True, dropout=0.1)
35
+ (dropout): Dropout(p=0.1, inplace=False)
36
+ (attention): Attention()
37
+ (concat): Linear(in_features=1024, out_features=512, bias=True)
38
+ (out): Linear(in_features=512, out_features=28647, bias=True)
39
+ )
40
+ )
41
+ [Info] Total Parameters: 36,806,887
42
+ [Info] Trainable Parameters: 36,806,887
43
+ [Info] Model Size (approx): 140.41 MB
44
+ ===== Epoch 1/20 =====
45
+ Epoch 01 | Time: 22m29s | train loss: 6.1225 | val loss: 6.1888
46
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 1
47
+ ===== Epoch 2/20 =====
48
+ Epoch 02 | Time: 19m42s | train loss: 5.4501 | val loss: 5.9591
49
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 2
50
+ ===== Epoch 3/20 =====
51
+ Epoch 03 | Time: 19m45s | train loss: 5.1753 | val loss: 5.8372
52
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 3
53
+ ===== Epoch 4/20 =====
54
+ Epoch 04 | Time: 19m45s | train loss: 4.9817 | val loss: 5.7523
55
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 4
56
+ ===== Epoch 5/20 =====
57
+ Epoch 05 | Time: 19m51s | train loss: 4.7887 | val loss: 5.6182
58
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 5
59
+ ===== Epoch 6/20 =====
60
+ Epoch 06 | Time: 19m46s | train loss: 4.6178 | val loss: 5.5535
61
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 6
62
+ ===== Epoch 7/20 =====
63
+ Epoch 07 | Time: 19m47s | train loss: 4.4872 | val loss: 5.5063
64
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 7
65
+ ===== Epoch 8/20 =====
66
+ Epoch 08 | Time: 19m43s | train loss: 4.3829 | val loss: 5.4677
67
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 8
68
+ ===== Epoch 9/20 =====
69
+ Epoch 09 | Time: 19m46s | train loss: 4.2980 | val loss: 5.4273
70
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 9
71
+ ===== Epoch 10/20 =====
72
+ Epoch 10 | Time: 19m46s | train loss: 4.2265 | val loss: 5.4023
73
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 10
74
+ ===== Epoch 11/20 =====
75
+ Epoch 11 | Time: 19m49s | train loss: 4.1654 | val loss: 5.3904
76
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 11
77
+ ===== Epoch 12/20 =====
78
+ Epoch 12 | Time: 19m45s | train loss: 4.1105 | val loss: 5.4086
79
+ ===== Epoch 13/20 =====
80
+ Epoch 13 | Time: 19m43s | train loss: 4.0633 | val loss: 5.3844
81
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 13
82
+ ===== Epoch 14/20 =====
83
+ Epoch 14 | Time: 19m45s | train loss: 4.0223 | val loss: 5.3724
84
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 14
85
+ ===== Epoch 15/20 =====
86
+ Epoch 15 | Time: 19m44s | train loss: 3.9840 | val loss: 5.3649
87
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 15
88
+ ===== Epoch 16/20 =====
89
+ Epoch 16 | Time: 19m45s | train loss: 3.9500 | val loss: 5.3559
90
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 16
91
+ ===== Epoch 17/20 =====
92
+ Epoch 17 | Time: 19m43s | train loss: 3.9202 | val loss: 5.3502
93
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 17
94
+ ===== Epoch 18/20 =====
95
+ Epoch 18 | Time: 19m48s | train loss: 3.8926 | val loss: 5.3545
96
+ ===== Epoch 19/20 =====
97
+ Epoch 19 | Time: 19m44s | train loss: 3.8676 | val loss: 5.3607
98
+ ===== Epoch 20/20 =====
99
+ Epoch 20 | Time: 19m47s | train loss: 3.7420 | val loss: 5.3310
100
+ New best model saved to runs\train\Nltk_100k\rnn\20251227_2104\best_model.pt, best epoch: 20
101
+ Model for epoch 20 saved to runs\train\Nltk_100k\rnn\20251227_2104\model_epoch_20.pt
102
+
103
+ Training finished! Total Epochs: 20 Total Time: 398m12s
rnn/dot/model_epoch_20.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69a74bee49ed6b8d6184ab954ca49a3864f68eaa698efd970640046e804dbc9e
3
+ size 443012826
rnn/general/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb4db4628635881ed1df9ca9d54f0104a339f881edcbb3e82f4293a14794e95
3
+ size 446166690
rnn/general/config.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ------------- 分词器 -----------------
2
+ tokenizer: tokenizer.NltkTokenizer # 可改为自定义类的全路径
3
+
4
+ # ------------- 模型结构 (针对 100K 数据 & 8G 显存优化) ----------------
5
+ model:
6
+ type: "rnn"
7
+ encoder_embedding_dim: 256
8
+ decoder_embedding_dim: 256
9
+ hidden_dim: 512 # GRU 隐藏层维度
10
+ n_layers: 2 # 作业要求: 2 layers
11
+ dropout: 0.1
12
+ # 关键参数: 探索不同的 attention 方式
13
+ # 可选项: "dot" (默认), "general", "concat"
14
+ attention_method: "general"
15
+
16
+ # ------------- 训练超参 ----------------
17
+ train:
18
+ batch_size: 32
19
+ epochs: 20
20
+ lr: 0.0005
21
+ weight_decay: 0.0001
22
+ clip: 1.0 # 梯度裁剪阈值
23
+ # 学习率调度参数
24
+ lr_patience: 2
25
+ lr_factor: 0.5
26
+ save_dir: runs/train/Nltk_100k
27
+ num_workers: 0
28
+
29
+ # ------------- 数据路径 ----------------
30
+ data:
31
+ raw_train: data/train_100k.jsonl # 输入文件不变
32
+ raw_val: data/valid.jsonl
33
+ raw_test: data/test.jsonl
34
+
35
+ processed_dir: data/processed_nltk_100k
36
+ train_processed: data/processed_nltk_100k/train.jsonl
37
+ val_processed: data/processed_nltk_100k/val.jsonl
38
+ test_processed: data/processed_nltk_100k/test.jsonl
39
+
40
+ src_vocab: data/processed_nltk_100k/src_vocab.pkl
41
+ tgt_vocab: data/processed_nltk_100k/tgt_vocab.pkl
42
+ min_freq: 2
43
+
44
+ # ------------- 其余 --------------------
45
+ seed: 3407 # 固定随机种子,保证可复现
rnn/general/log.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Info] Logging started. Output will be saved to runs\train\Nltk_100k\rnn\20251228_0557\log.txt
2
+ [Info] Model Architecture: RNN
3
+ Seq2SeqRNN(
4
+ (encoder): EncoderRNN(
5
+ (embedding): Embedding(34122, 256)
6
+ (gru): GRU(256, 512, num_layers=2, batch_first=True, dropout=0.1)
7
+ (dropout): Dropout(p=0.1, inplace=False)
8
+ )
9
+ (decoder): DecoderRNN(
10
+ (embedding): Embedding(28647, 256)
11
+ (gru): GRU(256, 512, num_layers=2, batch_first=True, dropout=0.1)
12
+ (dropout): Dropout(p=0.1, inplace=False)
13
+ (attention): Attention(
14
+ (attn): Linear(in_features=512, out_features=512, bias=True)
15
+ )
16
+ (concat): Linear(in_features=1024, out_features=512, bias=True)
17
+ (out): Linear(in_features=512, out_features=28647, bias=True)
18
+ )
19
+ )
20
+ [Info] Total Parameters: 37,069,543
21
+ [Info] Trainable Parameters: 37,069,543
22
+ [Info] Model Size (approx): 141.41 MB
23
+ ===== Epoch 1/20 =====
24
+ Epoch 01 | Time: 21m34s | train loss: 6.1383 | val loss: 6.1445
25
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 1
26
+ ===== Epoch 2/20 =====
27
+ Epoch 02 | Time: 22m49s | train loss: 5.3770 | val loss: 5.9185
28
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 2
29
+ ===== Epoch 3/20 =====
30
+ Epoch 03 | Time: 21m51s | train loss: 5.0793 | val loss: 5.7854
31
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 3
32
+ ===== Epoch 4/20 =====
33
+ Epoch 04 | Time: 21m30s | train loss: 4.8973 | val loss: 5.6876
34
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 4
35
+ ===== Epoch 5/20 =====
36
+ Epoch 05 | Time: 21m36s | train loss: 4.7746 | val loss: 5.6771
37
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 5
38
+ ===== Epoch 6/20 =====
39
+ Epoch 06 | Time: 21m45s | train loss: 4.6856 | val loss: 5.6152
40
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 6
41
+ ===== Epoch 7/20 =====
42
+ Epoch 07 | Time: 21m45s | train loss: 4.6145 | val loss: 5.5739
43
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 7
44
+ ===== Epoch 8/20 =====
45
+ Epoch 08 | Time: 21m43s | train loss: 4.5574 | val loss: 5.5883
46
+ ===== Epoch 9/20 =====
47
+ Epoch 09 | Time: 21m46s | train loss: 4.5090 | val loss: 5.5381
48
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 9
49
+ ===== Epoch 10/20 =====
50
+ Epoch 10 | Time: 21m45s | train loss: 4.4682 | val loss: 5.5439
51
+ ===== Epoch 11/20 =====
52
+ Epoch 11 | Time: 21m35s | train loss: 4.4376 | val loss: 5.5325
53
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 11
54
+ ===== Epoch 12/20 =====
55
+ Epoch 12 | Time: 21m44s | train loss: 4.4089 | val loss: 5.5030
56
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 12
57
+ ===== Epoch 13/20 =====
58
+ Epoch 13 | Time: 21m48s | train loss: 4.3860 | val loss: 5.5052
59
+ ===== Epoch 14/20 =====
60
+ Epoch 14 | Time: 21m42s | train loss: 4.3655 | val loss: 5.4956
61
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 14
62
+ ===== Epoch 15/20 =====
63
+ Epoch 15 | Time: 21m59s | train loss: 4.3467 | val loss: 5.4969
64
+ ===== Epoch 16/20 =====
65
+ Epoch 16 | Time: 21m51s | train loss: 4.3303 | val loss: 5.4816
66
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 16
67
+ ===== Epoch 17/20 =====
68
+ Epoch 17 | Time: 21m38s | train loss: 4.3171 | val loss: 5.4770
69
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 17
70
+ ===== Epoch 18/20 =====
71
+ Epoch 18 | Time: 21m43s | train loss: 4.3050 | val loss: 5.4545
72
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 18
73
+ ===== Epoch 19/20 =====
74
+ Epoch 19 | Time: 21m40s | train loss: 4.2943 | val loss: 5.4553
75
+ ===== Epoch 20/20 =====
76
+ Epoch 20 | Time: 21m41s | train loss: 4.2881 | val loss: 5.4534
77
+ New best model saved to runs\train\Nltk_100k\rnn\20251228_0557\best_model.pt, best epoch: 20
78
+ Model for epoch 20 saved to runs\train\Nltk_100k\rnn\20251228_0557\model_epoch_20.pt
79
+
80
+ Training finished! Total Epochs: 20 Total Time: 435m49s
rnn/general/model_epoch_20.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d60b7a5f0764a05f7b127741a175beabd41bebc303d5a19b4338a24eaa2cf2
3
+ size 446167034