Upload 4 files

Browse files

Files changed (4) hide show

models/best_model.pth +3 -0
models/final_model.pth +3 -0
models/king_ai.py +119 -0
models/train_bc.py +109 -0

models/best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:800bf47a5776df6cdcdd45d9a555ba8e0ac7a416467dd0a257871accfe1c0b3a
+size 6765301

models/final_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:589f9b776ca64fb3e2725786b520bd26cf7b207d02d05d5148e8fd2ea2565d49
+size 6765317

models/king_ai.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# models/king_ai.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from config import NUM_ACTIONS, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS
+class KingAI(nn.Module):
+    """
+    王者荣耀 AI 模型
+    输入: (batch, 3, 84, 84) 游戏画面
+    输出: (batch, NUM_ACTIONS) 动作概率
+    """
+    def __init__(self, num_actions=NUM_ACTIONS):
+        super().__init__()
+        # 卷积层
+        self.conv1 = nn.Conv2d(IMG_CHANNELS, 32, kernel_size=8, stride=4)
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
+        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
+        # 计算全连接层输入维度
+        self._calculate_fc_dim()
+        # 全连接层
+        self.fc1 = nn.Linear(self.fc_input_dim, 512)
+        self.fc2 = nn.Linear(512, num_actions)
+        self._initialize_weights()
+    def _calculate_fc_dim(self):
+        """计算卷积层输出维度"""
+        with torch.no_grad():
+            dummy = torch.zeros(1, IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH)
+            x = F.relu(self.conv1(dummy))
+            x = F.relu(self.conv2(x))
+            x = F.relu(self.conv3(x))
+            self.fc_input_dim = x.view(1, -1).shape[1]
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        x = x.view(x.size(0), -1)
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+class ActorCritic(nn.Module):
+    """
+    Actor-Critic 网络，用于强化学习
+    共享特征层，分别输出动作概率和状态价值
+    """
+    def __init__(self, num_actions=NUM_ACTIONS):
+        super().__init__()
+        self.conv1 = nn.Conv2d(IMG_CHANNELS, 32, 8, stride=4)
+        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
+        self.conv3 = nn.Conv2d(64, 64, 3, stride=1)
+        # 计算维度
+        with torch.no_grad():
+            dummy = torch.zeros(1, IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH)
+            x = F.relu(self.conv1(dummy))
+            x = F.relu(self.conv2(x))
+            x = F.relu(self.conv3(x))
+            fc_dim = x.view(1, -1).shape[1]
+        self.fc_shared = nn.Linear(fc_dim, 512)
+        self.actor = nn.Linear(512, num_actions)
+        self.critic = nn.Linear(512, 1)
+        self._initialize_weights()
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        x = x.view(x.size(0), -1)
+        x = F.relu(self.fc_shared(x))
+        action_logits = self.actor(x)
+        value = self.critic(x)
+        return action_logits, value
+def test_model():
+    """测试模型输出"""
+    model = KingAI()
+    dummy = torch.randn(4, 3, 84, 84)
+    output = model(dummy)
+    print(f"KingAI - 输入: {dummy.shape}, 输出: {output.shape}")
+    print(f"参数量: {sum(p.numel() for p in model.parameters()):,}")
+    ac_model = ActorCritic()
+    logits, values = ac_model(dummy)
+    print(f"ActorCritic - logits: {logits.shape}, values: {values.shape}")
+if __name__ == "__main__":
+    test_model()

models/train_bc.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# models/train_bc.py
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from config import NUM_EPOCHS, LEARNING_RATE, MODEL_DIR, DEVICE
+from models.king_ai import KingAI
+from data.dataset import get_dataloaders
+def train():
+    """训练行为克隆模型"""
+    # 检测设备
+    if torch.backends.mps.is_available():
+        device = torch.device("mps")
+        print("✅ 使用 MPS (Apple Silicon GPU) 加速")
+    elif torch.cuda.is_available():
+        device = torch.device("cuda")
+        print("✅ 使用 CUDA (NVIDIA GPU) 加速")
+    else:
+        device = torch.device("cpu")
+        print("⚠️ 使用 CPU 训练")
+    # 加载数据
+    print("\n加载数据...")
+    train_loader, val_loader = get_dataloaders(
+        frames_dir="data/frames/game_01",
+        annotation_file="data/annotations/annotations.json"
+    )
+    # 创建模型
+    model = KingAI().to(device)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
+    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
+    print(f"\n开始训练 {NUM_EPOCHS} 轮...")
+    print("=" * 50)
+    best_acc = 0.0
+    for epoch in range(NUM_EPOCHS):
+        # 训练阶段
+        model.train()
+        train_loss = 0.0
+        train_correct = 0
+        train_total = 0
+        for images, actions in train_loader:
+            images, actions = images.to(device), actions.to(device)
+            optimizer.zero_grad()
+            outputs = model(images)
+            loss = criterion(outputs, actions)
+            loss.backward()
+            optimizer.step()
+            train_loss += loss.item()
+            _, predicted = torch.max(outputs, 1)
+            train_total += actions.size(0)
+            train_correct += (predicted == actions).sum().item()
+        train_acc = 100 * train_correct / train_total
+        # 验证阶段
+        model.eval()
+        val_loss = 0.0
+        val_correct = 0
+        val_total = 0
+        with torch.no_grad():
+            for images, actions in val_loader:
+                images, actions = images.to(device), actions.to(device)
+                outputs = model(images)
+                loss = criterion(outputs, actions)
+                val_loss += loss.item()
+                _, predicted = torch.max(outputs, 1)
+                val_total += actions.size(0)
+                val_correct += (predicted == actions).sum().item()
+        val_acc = 100 * val_correct / val_total
+        scheduler.step()
+        print(f"Epoch [{epoch+1:3d}/{NUM_EPOCHS}] "
+              f"Train Loss: {train_loss/len(train_loader):.4f} "
+              f"Train Acc: {train_acc:.2f}% | "
+              f"Val Loss: {val_loss/len(val_loader):.4f} "
+              f"Val Acc: {val_acc:.2f}%")
+        # 保存最佳模型
+        if val_acc > best_acc:
+            best_acc = val_acc
+            torch.save(model.state_dict(), os.path.join(MODEL_DIR, "best_model.pth"))
+            print(f"  ✅ 保存最佳模型 (准确率: {val_acc:.2f}%)")
+    # 保存最终模型
+    torch.save(model.state_dict(), os.path.join(MODEL_DIR, "final_model.pth"))
+    print(f"\n🎉 训练完成！最佳验证准确率: {best_acc:.2f}%")
+    print(f"模型保存在: {MODEL_DIR}")
+if __name__ == "__main__":
+    train()