clarenceleo commited on
Commit
58d2582
·
verified ·
1 Parent(s): e012066

Upload 4 files

Browse files
models/best_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:800bf47a5776df6cdcdd45d9a555ba8e0ac7a416467dd0a257871accfe1c0b3a
3
+ size 6765301
models/final_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589f9b776ca64fb3e2725786b520bd26cf7b207d02d05d5148e8fd2ea2565d49
3
+ size 6765317
models/king_ai.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/king_ai.py
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ import sys
6
+ import os
7
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
+ from config import NUM_ACTIONS, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS
9
+
10
+ class KingAI(nn.Module):
11
+ """
12
+ 王者荣耀 AI 模型
13
+ 输入: (batch, 3, 84, 84) 游戏画面
14
+ 输出: (batch, NUM_ACTIONS) 动作概率
15
+ """
16
+
17
+ def __init__(self, num_actions=NUM_ACTIONS):
18
+ super().__init__()
19
+
20
+ # 卷积层
21
+ self.conv1 = nn.Conv2d(IMG_CHANNELS, 32, kernel_size=8, stride=4)
22
+ self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
23
+ self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
24
+
25
+ # 计算全连接层输入维度
26
+ self._calculate_fc_dim()
27
+
28
+ # 全连接层
29
+ self.fc1 = nn.Linear(self.fc_input_dim, 512)
30
+ self.fc2 = nn.Linear(512, num_actions)
31
+
32
+ self._initialize_weights()
33
+
34
+ def _calculate_fc_dim(self):
35
+ """计算卷积层输出维度"""
36
+ with torch.no_grad():
37
+ dummy = torch.zeros(1, IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH)
38
+ x = F.relu(self.conv1(dummy))
39
+ x = F.relu(self.conv2(x))
40
+ x = F.relu(self.conv3(x))
41
+ self.fc_input_dim = x.view(1, -1).shape[1]
42
+
43
+ def _initialize_weights(self):
44
+ for m in self.modules():
45
+ if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
46
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
47
+ if m.bias is not None:
48
+ nn.init.constant_(m.bias, 0)
49
+
50
+ def forward(self, x):
51
+ x = F.relu(self.conv1(x))
52
+ x = F.relu(self.conv2(x))
53
+ x = F.relu(self.conv3(x))
54
+ x = x.view(x.size(0), -1)
55
+ x = F.relu(self.fc1(x))
56
+ x = self.fc2(x)
57
+ return x
58
+
59
+
60
+ class ActorCritic(nn.Module):
61
+ """
62
+ Actor-Critic 网络,用于强化学习
63
+ 共享特征层,分别输出动作概率和状态价值
64
+ """
65
+
66
+ def __init__(self, num_actions=NUM_ACTIONS):
67
+ super().__init__()
68
+
69
+ self.conv1 = nn.Conv2d(IMG_CHANNELS, 32, 8, stride=4)
70
+ self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
71
+ self.conv3 = nn.Conv2d(64, 64, 3, stride=1)
72
+
73
+ # 计算维度
74
+ with torch.no_grad():
75
+ dummy = torch.zeros(1, IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH)
76
+ x = F.relu(self.conv1(dummy))
77
+ x = F.relu(self.conv2(x))
78
+ x = F.relu(self.conv3(x))
79
+ fc_dim = x.view(1, -1).shape[1]
80
+
81
+ self.fc_shared = nn.Linear(fc_dim, 512)
82
+ self.actor = nn.Linear(512, num_actions)
83
+ self.critic = nn.Linear(512, 1)
84
+
85
+ self._initialize_weights()
86
+
87
+ def _initialize_weights(self):
88
+ for m in self.modules():
89
+ if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
90
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
91
+
92
+ def forward(self, x):
93
+ x = F.relu(self.conv1(x))
94
+ x = F.relu(self.conv2(x))
95
+ x = F.relu(self.conv3(x))
96
+ x = x.view(x.size(0), -1)
97
+ x = F.relu(self.fc_shared(x))
98
+
99
+ action_logits = self.actor(x)
100
+ value = self.critic(x)
101
+
102
+ return action_logits, value
103
+
104
+
105
+ def test_model():
106
+ """测试模型输出"""
107
+ model = KingAI()
108
+ dummy = torch.randn(4, 3, 84, 84)
109
+ output = model(dummy)
110
+ print(f"KingAI - 输入: {dummy.shape}, 输出: {output.shape}")
111
+ print(f"参数量: {sum(p.numel() for p in model.parameters()):,}")
112
+
113
+ ac_model = ActorCritic()
114
+ logits, values = ac_model(dummy)
115
+ print(f"ActorCritic - logits: {logits.shape}, values: {values.shape}")
116
+
117
+
118
+ if __name__ == "__main__":
119
+ test_model()
models/train_bc.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/train_bc.py
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.optim as optim
5
+ import numpy as np
6
+ import os
7
+ import sys
8
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ from config import NUM_EPOCHS, LEARNING_RATE, MODEL_DIR, DEVICE
11
+ from models.king_ai import KingAI
12
+ from data.dataset import get_dataloaders
13
+
14
+
15
+ def train():
16
+ """训练行为克隆模型"""
17
+ # 检测设备
18
+ if torch.backends.mps.is_available():
19
+ device = torch.device("mps")
20
+ print("✅ 使用 MPS (Apple Silicon GPU) 加速")
21
+ elif torch.cuda.is_available():
22
+ device = torch.device("cuda")
23
+ print("✅ 使用 CUDA (NVIDIA GPU) 加速")
24
+ else:
25
+ device = torch.device("cpu")
26
+ print("⚠️ 使用 CPU 训练")
27
+
28
+ # 加载数据
29
+ print("\n加载数据...")
30
+ train_loader, val_loader = get_dataloaders(
31
+ frames_dir="data/frames/game_01",
32
+ annotation_file="data/annotations/annotations.json"
33
+ )
34
+
35
+ # 创建模型
36
+ model = KingAI().to(device)
37
+ criterion = nn.CrossEntropyLoss()
38
+ optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
39
+ scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
40
+
41
+ print(f"\n开始训练 {NUM_EPOCHS} 轮...")
42
+ print("=" * 50)
43
+
44
+ best_acc = 0.0
45
+
46
+ for epoch in range(NUM_EPOCHS):
47
+ # 训练阶段
48
+ model.train()
49
+ train_loss = 0.0
50
+ train_correct = 0
51
+ train_total = 0
52
+
53
+ for images, actions in train_loader:
54
+ images, actions = images.to(device), actions.to(device)
55
+
56
+ optimizer.zero_grad()
57
+ outputs = model(images)
58
+ loss = criterion(outputs, actions)
59
+ loss.backward()
60
+ optimizer.step()
61
+
62
+ train_loss += loss.item()
63
+ _, predicted = torch.max(outputs, 1)
64
+ train_total += actions.size(0)
65
+ train_correct += (predicted == actions).sum().item()
66
+
67
+ train_acc = 100 * train_correct / train_total
68
+
69
+ # 验证阶段
70
+ model.eval()
71
+ val_loss = 0.0
72
+ val_correct = 0
73
+ val_total = 0
74
+
75
+ with torch.no_grad():
76
+ for images, actions in val_loader:
77
+ images, actions = images.to(device), actions.to(device)
78
+ outputs = model(images)
79
+ loss = criterion(outputs, actions)
80
+
81
+ val_loss += loss.item()
82
+ _, predicted = torch.max(outputs, 1)
83
+ val_total += actions.size(0)
84
+ val_correct += (predicted == actions).sum().item()
85
+
86
+ val_acc = 100 * val_correct / val_total
87
+
88
+ scheduler.step()
89
+
90
+ print(f"Epoch [{epoch+1:3d}/{NUM_EPOCHS}] "
91
+ f"Train Loss: {train_loss/len(train_loader):.4f} "
92
+ f"Train Acc: {train_acc:.2f}% | "
93
+ f"Val Loss: {val_loss/len(val_loader):.4f} "
94
+ f"Val Acc: {val_acc:.2f}%")
95
+
96
+ # 保存最佳模型
97
+ if val_acc > best_acc:
98
+ best_acc = val_acc
99
+ torch.save(model.state_dict(), os.path.join(MODEL_DIR, "best_model.pth"))
100
+ print(f" ✅ 保存最佳模型 (准确率: {val_acc:.2f}%)")
101
+
102
+ # 保存最终模型
103
+ torch.save(model.state_dict(), os.path.join(MODEL_DIR, "final_model.pth"))
104
+ print(f"\n🎉 训练完成!最佳验证准确率: {best_acc:.2f}%")
105
+ print(f"模型保存在: {MODEL_DIR}")
106
+
107
+
108
+ if __name__ == "__main__":
109
+ train()