Upload 4 files

Browse files

Files changed (2) hide show

VGT_Pro_Conv_Logic_Emergence/Logic-Orchestrator.py +93 -0
VGT_Pro_Conv_Logic_Emergence/vgt_vs_base_benchmark.py +120 -0

VGT_Pro_Conv_Logic_Emergence/Logic-Orchestrator.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import random
+# --- 1. 模型架构 (保持不变) ---
+class VGTProModel(nn.Module):
+    def __init__(self, hidden_size=128):
+        super().__init__()
+        self.embedding = nn.Embedding(10, hidden_size)
+        self.reducer = nn.Conv1d(2 * hidden_size, hidden_size, kernel_size=1)
+        self.conv_process = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
+        self.output_proj = nn.Conv1d(hidden_size, 10, kernel_size=1)
+    def forward(self, x):
+        digits = x.shape[1] // 2
+        x_emb = self.embedding(x).transpose(1, 2)
+        h = torch.relu(self.reducer(torch.cat([x_emb[:, :, :digits], x_emb[:, :, digits:]], dim=1)))
+        h = F.pad(h, (0, 1))
+        for i in range(h.size(2) + 4):
+            dilation = 1 if i < 4 else (2 if i < 8 else 4)
+            h_residual = F.conv1d(h, self.conv_process.weight, self.conv_process.bias,
+                                  padding=dilation, dilation=dilation)
+            h = torch.relu(h_residual) + h
+        return self.output_proj(h).transpose(1, 2)
+# --- 2. 基于 VGT-Pro 的逻辑运算单元 ---
+class VGTLogicEngine:
+    def __init__(self, model_path, device):
+        self.device = device
+        checkpoint = torch.load(model_path, map_location=device)
+        self.model = VGTProModel().to(device)
+        self.model.load_state_dict(checkpoint['model_state_dict'])
+        self.model.eval()
+    def add(self, a, b):
+        """核心加法器：使用 VGT-Pro 计算 a + b"""
+        max_len = max(len(str(a)), len(str(b))) + 1
+        a_d = [int(d) for d in str(a).zfill(max_len)][::-1]
+        b_d = [int(d) for d in str(b).zfill(max_len)][::-1]
+        x_in = torch.tensor([a_d + b_d], dtype=torch.long).to(self.device)
+        with torch.no_grad():
+            logits = self.model(x_in)
+            preds = logits[0].argmax(dim=-1).cpu().tolist()
+        return sum(d * (10 ** i) for i, d in enumerate(preds))
+    def multiply(self, a, b):
+        """逻辑外推：通过重复累加实现乘法"""
+        res = 0
+        b_str = str(b)
+        for i, digit in enumerate(reversed(b_str)):
+            partial_sum = 0
+            # 这里的 A * single_digit 通过重复加法实现
+            for _ in range(int(digit)):
+                partial_sum = self.add(partial_sum, a)
+            # 处理位移 (左移 i 位)
+            shifted_partial = partial_sum * (10 ** i)
+            # 总和累加
+            res = self.add(res, shifted_partial)
+        return res
+# --- 3. 测试脚本 ---
+def verify_multiplication(model_path, samples=20, max_digits=4):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    engine = VGTLogicEngine(model_path, device)
+    print(f"✅ 加载逻辑引擎，开始【逻辑外推乘法】测试...")
+    print(f"{'题目':<25} | {'预期结果':<15} | {'模型结果':<15} | {'状态'}")
+    print("-" * 80)
+    correct = 0
+    for _ in range(samples):
+        # 乘法增长极快，我们测试 4 位数乘法 (结果可达 8 位)
+        a = random.randint(1, 10**max_digits - 1)
+        b = random.randint(1, 10**max_digits - 1)
+        true_prod = a * b
+        pred_prod = engine.multiply(a, b)
+        status = "✅" if true_prod == pred_prod else "❌"
+        if true_prod == pred_prod: correct += 1
+        print(f"{f'{a} x {b}':<25} | {true_prod:<15} | {pred_prod:<15} | {status}")
+    print("-" * 80)
+    print(f"测试完成！准确率: {(correct/samples)*100:.2f}%")
+if __name__ == "__main__":
+    MODEL_PATH = "vgt_pro_logic_machine.pth"
+    # 测试 4 位数乘法，这会涉及几十次高精度加法调用
+    verify_multiplication(MODEL_PATH, samples=20, max_digits=4)

VGT_Pro_Conv_Logic_Emergence/vgt_vs_base_benchmark.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import random
+import pandas as pd # 用于生成清晰的对比表格
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# --- 实验超参数 (严格对齐论文) ---
+MAX_DIGITS = 6          # 训练位数
+HIDDEN_SIZE = 128
+LR = 3e-4
+TRAIN_STEPS = 10000
+BATCH_SIZE = 64
+# --- 1. 定义模型架构 ---
+class AdditionModel(nn.Module):
+    def __init__(self, hidden_size):
+        super().__init__()
+        self.embedding = nn.Embedding(10, hidden_size)
+        self.reducer = nn.Conv1d(2 * hidden_size, hidden_size, kernel_size=1)
+        self.conv_process = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
+        self.output_proj = nn.Conv1d(hidden_size, 10, kernel_size=1)
+    def forward(self, x):
+        B, L = x.shape
+        digits = L // 2
+        x_emb = self.embedding(x).transpose(1, 2)
+        a_part = x_emb[:, :, :digits]; b_part = x_emb[:, :, digits:]
+        h = torch.relu(self.reducer(torch.cat([a_part, b_part], dim=1)))
+        h = nn.functional.pad(h, (0, 1))
+        for _ in range(h.size(2)): # 递归迭代
+            h = torch.relu(self.conv_process(h)) + h
+        return self.output_proj(h).transpose(1, 2), h
+# --- 2. 训练函数 (通过 vgt_mode 参数控制变量) ---
+def train_model(vgt_mode=True):
+    model = AdditionModel(HIDDEN_SIZE).to(DEVICE)
+    optimizer = optim.AdamW(model.parameters(), lr=LR)
+    mode_name = "VGT (With L2 Pressure)" if vgt_mode else "Base (No Constraint)"
+    print(f"\n--- Training {mode_name} ---")
+    for step in range(TRAIN_STEPS + 1):
+        model.train()
+        x, y = generate_batch(BATCH_SIZE)
+        optimizer.zero_grad()
+        logits, h_states = model(x)
+        # 基础交叉熵损失
+        loss_ce = nn.functional.cross_entropy(logits.reshape(-1, 10), y.reshape(-1))
+        if vgt_mode:
+            # 几何约束：Alpha 动态衰减 [cite: 22, 23]
+            alpha = max(1.0, 50.0 - (50.0 - 1.0) * (step / TRAIN_STEPS))
+            # 论文核心：隐藏状态的 L2 惩罚 [cite: 7, 22]
+            loss_l2 = torch.norm(h_states, p=2, dim=1).mean()
+            loss = loss_ce + alpha * 1e-4 * loss_l2
+        else:
+            loss = loss_ce # 普通训练模式
+        loss.backward()
+        optimizer.step()
+        if step % 2000 == 0:
+            std = model.output_proj.weight.std().item()
+            print(f"Step {step:5d} | CE Loss: {loss_ce.item():.4f} | Head Weight Std: {std:.4f}")
+    return model
+# --- 3. 生成数据与评估 ---
+def generate_batch(batch_size, digits=MAX_DIGITS):
+    x, y = [], []
+    for _ in range(batch_size):
+        a = random.randint(0, 10**digits - 1); b = random.randint(0, 10**digits - 1)
+        c = a + b
+        a_d = [int(d) for d in str(a).zfill(digits)][::-1]
+        b_d = [int(d) for d in str(b).zfill(digits)][::-1]
+        c_d = [int(d) for d in str(c).zfill(digits + 1)][::-1]
+        x.append(a_d + b_d); y.append(c_d)
+    return torch.tensor(x, dtype=torch.long).to(DEVICE), torch.tensor(y, dtype=torch.long).to(DEVICE)
+def evaluate(model, digits):
+    model.eval()
+    correct = 0
+    num_tests = 500
+    with torch.no_grad():
+        for _ in range(num_tests):
+            a = random.randint(0, 10**digits - 1); b = random.randint(0, 10**digits - 1)
+            true_c = a + b
+            a_d = [int(d) for d in str(a).zfill(digits)][::-1]
+            b_d = [int(d) for d in str(b).zfill(digits)][::-1]
+            x_in = torch.tensor([a_d + b_d], dtype=torch.long).to(DEVICE)
+            logits, _ = model(x_in)
+            pred_digits = logits[0].argmax(dim=-1).cpu().tolist()
+            pred_c = sum(d * (10 ** i) for i, d in enumerate(pred_digits))
+            if pred_c == true_c: correct += 1
+    return (correct / num_tests) * 100
+# --- 4. 执行对比实验 ---
+if __name__ == "__main__":
+    # 训练两个模型
+    base_model = train_model(vgt_mode=False) # 对照组
+    vgt_model = train_model(vgt_mode=True)   # 实验组（几何压力）
+    # 测试泛化能力
+    results = []
+    for d in [6, 12, 20]:
+        acc_base = evaluate(base_model, d)
+        acc_vgt = evaluate(vgt_model, d)
+        results.append({"Digits": d, "Base Accuracy (%)": acc_base, "VGT Accuracy (%)": acc_vgt})
+    # 输出对比表格
+    df = pd.DataFrame(results)
+    print("\n" + "="*50)
+    print("FINAL COMPARISON: BASE vs VGT (GEOMETRIC PRESSURE)")
+    print("="*50)
+    print(df.to_string(index=False))
+    print("="*50)