Upload 4 files
Browse files- models/best_model.pth +3 -0
- models/final_model.pth +3 -0
- models/king_ai.py +119 -0
- models/train_bc.py +109 -0
models/best_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:800bf47a5776df6cdcdd45d9a555ba8e0ac7a416467dd0a257871accfe1c0b3a
|
| 3 |
+
size 6765301
|
models/final_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:589f9b776ca64fb3e2725786b520bd26cf7b207d02d05d5148e8fd2ea2565d49
|
| 3 |
+
size 6765317
|
models/king_ai.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/king_ai.py
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 8 |
+
from config import NUM_ACTIONS, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS
|
| 9 |
+
|
| 10 |
+
class KingAI(nn.Module):
|
| 11 |
+
"""
|
| 12 |
+
王者荣耀 AI 模型
|
| 13 |
+
输入: (batch, 3, 84, 84) 游戏画面
|
| 14 |
+
输出: (batch, NUM_ACTIONS) 动作概率
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, num_actions=NUM_ACTIONS):
|
| 18 |
+
super().__init__()
|
| 19 |
+
|
| 20 |
+
# 卷积层
|
| 21 |
+
self.conv1 = nn.Conv2d(IMG_CHANNELS, 32, kernel_size=8, stride=4)
|
| 22 |
+
self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
|
| 23 |
+
self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
|
| 24 |
+
|
| 25 |
+
# 计算全连接层输入维度
|
| 26 |
+
self._calculate_fc_dim()
|
| 27 |
+
|
| 28 |
+
# 全连接层
|
| 29 |
+
self.fc1 = nn.Linear(self.fc_input_dim, 512)
|
| 30 |
+
self.fc2 = nn.Linear(512, num_actions)
|
| 31 |
+
|
| 32 |
+
self._initialize_weights()
|
| 33 |
+
|
| 34 |
+
def _calculate_fc_dim(self):
|
| 35 |
+
"""计算卷积层输出维度"""
|
| 36 |
+
with torch.no_grad():
|
| 37 |
+
dummy = torch.zeros(1, IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH)
|
| 38 |
+
x = F.relu(self.conv1(dummy))
|
| 39 |
+
x = F.relu(self.conv2(x))
|
| 40 |
+
x = F.relu(self.conv3(x))
|
| 41 |
+
self.fc_input_dim = x.view(1, -1).shape[1]
|
| 42 |
+
|
| 43 |
+
def _initialize_weights(self):
|
| 44 |
+
for m in self.modules():
|
| 45 |
+
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
|
| 46 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
| 47 |
+
if m.bias is not None:
|
| 48 |
+
nn.init.constant_(m.bias, 0)
|
| 49 |
+
|
| 50 |
+
def forward(self, x):
|
| 51 |
+
x = F.relu(self.conv1(x))
|
| 52 |
+
x = F.relu(self.conv2(x))
|
| 53 |
+
x = F.relu(self.conv3(x))
|
| 54 |
+
x = x.view(x.size(0), -1)
|
| 55 |
+
x = F.relu(self.fc1(x))
|
| 56 |
+
x = self.fc2(x)
|
| 57 |
+
return x
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class ActorCritic(nn.Module):
|
| 61 |
+
"""
|
| 62 |
+
Actor-Critic 网络,用于强化学习
|
| 63 |
+
共享特征层,分别输出动作概率和状态价值
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
def __init__(self, num_actions=NUM_ACTIONS):
|
| 67 |
+
super().__init__()
|
| 68 |
+
|
| 69 |
+
self.conv1 = nn.Conv2d(IMG_CHANNELS, 32, 8, stride=4)
|
| 70 |
+
self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
|
| 71 |
+
self.conv3 = nn.Conv2d(64, 64, 3, stride=1)
|
| 72 |
+
|
| 73 |
+
# 计算维度
|
| 74 |
+
with torch.no_grad():
|
| 75 |
+
dummy = torch.zeros(1, IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH)
|
| 76 |
+
x = F.relu(self.conv1(dummy))
|
| 77 |
+
x = F.relu(self.conv2(x))
|
| 78 |
+
x = F.relu(self.conv3(x))
|
| 79 |
+
fc_dim = x.view(1, -1).shape[1]
|
| 80 |
+
|
| 81 |
+
self.fc_shared = nn.Linear(fc_dim, 512)
|
| 82 |
+
self.actor = nn.Linear(512, num_actions)
|
| 83 |
+
self.critic = nn.Linear(512, 1)
|
| 84 |
+
|
| 85 |
+
self._initialize_weights()
|
| 86 |
+
|
| 87 |
+
def _initialize_weights(self):
|
| 88 |
+
for m in self.modules():
|
| 89 |
+
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
|
| 90 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
| 91 |
+
|
| 92 |
+
def forward(self, x):
|
| 93 |
+
x = F.relu(self.conv1(x))
|
| 94 |
+
x = F.relu(self.conv2(x))
|
| 95 |
+
x = F.relu(self.conv3(x))
|
| 96 |
+
x = x.view(x.size(0), -1)
|
| 97 |
+
x = F.relu(self.fc_shared(x))
|
| 98 |
+
|
| 99 |
+
action_logits = self.actor(x)
|
| 100 |
+
value = self.critic(x)
|
| 101 |
+
|
| 102 |
+
return action_logits, value
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def test_model():
|
| 106 |
+
"""测试模型输出"""
|
| 107 |
+
model = KingAI()
|
| 108 |
+
dummy = torch.randn(4, 3, 84, 84)
|
| 109 |
+
output = model(dummy)
|
| 110 |
+
print(f"KingAI - 输入: {dummy.shape}, 输出: {output.shape}")
|
| 111 |
+
print(f"参数量: {sum(p.numel() for p in model.parameters()):,}")
|
| 112 |
+
|
| 113 |
+
ac_model = ActorCritic()
|
| 114 |
+
logits, values = ac_model(dummy)
|
| 115 |
+
print(f"ActorCritic - logits: {logits.shape}, values: {values.shape}")
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
if __name__ == "__main__":
|
| 119 |
+
test_model()
|
models/train_bc.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/train_bc.py
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.optim as optim
|
| 5 |
+
import numpy as np
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 9 |
+
|
| 10 |
+
from config import NUM_EPOCHS, LEARNING_RATE, MODEL_DIR, DEVICE
|
| 11 |
+
from models.king_ai import KingAI
|
| 12 |
+
from data.dataset import get_dataloaders
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def train():
|
| 16 |
+
"""训练行为克隆模型"""
|
| 17 |
+
# 检测设备
|
| 18 |
+
if torch.backends.mps.is_available():
|
| 19 |
+
device = torch.device("mps")
|
| 20 |
+
print("✅ 使用 MPS (Apple Silicon GPU) 加速")
|
| 21 |
+
elif torch.cuda.is_available():
|
| 22 |
+
device = torch.device("cuda")
|
| 23 |
+
print("✅ 使用 CUDA (NVIDIA GPU) 加速")
|
| 24 |
+
else:
|
| 25 |
+
device = torch.device("cpu")
|
| 26 |
+
print("⚠️ 使用 CPU 训练")
|
| 27 |
+
|
| 28 |
+
# 加载数据
|
| 29 |
+
print("\n加载数据...")
|
| 30 |
+
train_loader, val_loader = get_dataloaders(
|
| 31 |
+
frames_dir="data/frames/game_01",
|
| 32 |
+
annotation_file="data/annotations/annotations.json"
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# 创建模型
|
| 36 |
+
model = KingAI().to(device)
|
| 37 |
+
criterion = nn.CrossEntropyLoss()
|
| 38 |
+
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
| 39 |
+
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
|
| 40 |
+
|
| 41 |
+
print(f"\n开始训练 {NUM_EPOCHS} 轮...")
|
| 42 |
+
print("=" * 50)
|
| 43 |
+
|
| 44 |
+
best_acc = 0.0
|
| 45 |
+
|
| 46 |
+
for epoch in range(NUM_EPOCHS):
|
| 47 |
+
# 训练阶段
|
| 48 |
+
model.train()
|
| 49 |
+
train_loss = 0.0
|
| 50 |
+
train_correct = 0
|
| 51 |
+
train_total = 0
|
| 52 |
+
|
| 53 |
+
for images, actions in train_loader:
|
| 54 |
+
images, actions = images.to(device), actions.to(device)
|
| 55 |
+
|
| 56 |
+
optimizer.zero_grad()
|
| 57 |
+
outputs = model(images)
|
| 58 |
+
loss = criterion(outputs, actions)
|
| 59 |
+
loss.backward()
|
| 60 |
+
optimizer.step()
|
| 61 |
+
|
| 62 |
+
train_loss += loss.item()
|
| 63 |
+
_, predicted = torch.max(outputs, 1)
|
| 64 |
+
train_total += actions.size(0)
|
| 65 |
+
train_correct += (predicted == actions).sum().item()
|
| 66 |
+
|
| 67 |
+
train_acc = 100 * train_correct / train_total
|
| 68 |
+
|
| 69 |
+
# 验证阶段
|
| 70 |
+
model.eval()
|
| 71 |
+
val_loss = 0.0
|
| 72 |
+
val_correct = 0
|
| 73 |
+
val_total = 0
|
| 74 |
+
|
| 75 |
+
with torch.no_grad():
|
| 76 |
+
for images, actions in val_loader:
|
| 77 |
+
images, actions = images.to(device), actions.to(device)
|
| 78 |
+
outputs = model(images)
|
| 79 |
+
loss = criterion(outputs, actions)
|
| 80 |
+
|
| 81 |
+
val_loss += loss.item()
|
| 82 |
+
_, predicted = torch.max(outputs, 1)
|
| 83 |
+
val_total += actions.size(0)
|
| 84 |
+
val_correct += (predicted == actions).sum().item()
|
| 85 |
+
|
| 86 |
+
val_acc = 100 * val_correct / val_total
|
| 87 |
+
|
| 88 |
+
scheduler.step()
|
| 89 |
+
|
| 90 |
+
print(f"Epoch [{epoch+1:3d}/{NUM_EPOCHS}] "
|
| 91 |
+
f"Train Loss: {train_loss/len(train_loader):.4f} "
|
| 92 |
+
f"Train Acc: {train_acc:.2f}% | "
|
| 93 |
+
f"Val Loss: {val_loss/len(val_loader):.4f} "
|
| 94 |
+
f"Val Acc: {val_acc:.2f}%")
|
| 95 |
+
|
| 96 |
+
# 保存最佳模型
|
| 97 |
+
if val_acc > best_acc:
|
| 98 |
+
best_acc = val_acc
|
| 99 |
+
torch.save(model.state_dict(), os.path.join(MODEL_DIR, "best_model.pth"))
|
| 100 |
+
print(f" ✅ 保存最佳模型 (准确率: {val_acc:.2f}%)")
|
| 101 |
+
|
| 102 |
+
# 保存最终模型
|
| 103 |
+
torch.save(model.state_dict(), os.path.join(MODEL_DIR, "final_model.pth"))
|
| 104 |
+
print(f"\n🎉 训练完成!最佳验证准确率: {best_acc:.2f}%")
|
| 105 |
+
print(f"模型保存在: {MODEL_DIR}")
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
if __name__ == "__main__":
|
| 109 |
+
train()
|