Spaces:
Paused
Paused
File size: 8,897 Bytes
399f3c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 |
#!/usr/bin/env python3
"""
Google Colab GPU检测和GraphRAG性能测试脚本
可以直接在Colab中运行:python colab_gpu_test.py
"""
import sys
import time
import torch
import numpy as np
from typing import List, Dict
def print_section(title: str):
"""打印分节标题"""
print("\n" + "="*60)
print(f"{title}")
print("="*60 + "\n")
def test_gpu_availability():
"""测试GPU可用性"""
print_section("🔍 GPU环境检测")
cuda_available = torch.cuda.is_available()
print(f"✅ CUDA可用: {cuda_available}")
if cuda_available:
print(f" GPU数量: {torch.cuda.device_count()}")
print(f" 当前GPU: {torch.cuda.current_device()}")
print(f" GPU名称: {torch.cuda.get_device_name(0)}")
print(f" CUDA版本: {torch.version.cuda}")
total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
print(f" 总显存: {total_memory:.2f} GB")
return True
else:
print("\n⚠️ 警告: 未检测到GPU")
print(" 在Colab中启用GPU: 运行时 → 更改运行时类型 → GPU")
return False
def benchmark_matrix_multiplication(matrix_size=5000):
"""GPU vs CPU 矩阵运算性能测试"""
print_section("⚡ GPU vs CPU 矩阵运算性能测试")
print(f"矩阵大小: {matrix_size}x{matrix_size}\n")
# CPU测试
print("🔵 CPU测试...")
a_cpu = torch.randn(matrix_size, matrix_size)
b_cpu = torch.randn(matrix_size, matrix_size)
start = time.time()
c_cpu = torch.mm(a_cpu, b_cpu)
cpu_time = time.time() - start
print(f" CPU时间: {cpu_time:.2f} 秒")
# GPU测试
if torch.cuda.is_available():
print("\n🟢 GPU测试...")
a_gpu = torch.randn(matrix_size, matrix_size).cuda()
b_gpu = torch.randn(matrix_size, matrix_size).cuda()
# 预热GPU
_ = torch.mm(a_gpu, b_gpu)
torch.cuda.synchronize()
start = time.time()
c_gpu = torch.mm(a_gpu, b_gpu)
torch.cuda.synchronize()
gpu_time = time.time() - start
print(f" GPU时间: {gpu_time:.2f} 秒")
speedup = cpu_time / gpu_time
print(f"\n🚀 加速比: {speedup:.2f}x")
print(f" GPU比CPU快 {speedup:.1f} 倍!")
return speedup
else:
print("\n⚠️ 跳过GPU测试(GPU不可用)")
return 1.0
def test_text_embedding_performance():
"""测试文本嵌入性能(需要sentence-transformers)"""
print_section("📝 文本嵌入性能测试")
try:
from sentence_transformers import SentenceTransformer
# 准备测试数据
test_texts = [
"Large Language Models are transforming AI",
"GraphRAG combines knowledge graphs with retrieval",
"GPU acceleration significantly improves performance",
"Natural language processing is advancing rapidly",
] * 250 # 1000个文本
print(f"测试数据: {len(test_texts)} 个文本\n")
# CPU测试
print("🔵 CPU嵌入测试...")
model_cpu = SentenceTransformer(
'sentence-transformers/all-MiniLM-L6-v2',
device='cpu'
)
start = time.time()
embeddings_cpu = model_cpu.encode(test_texts, show_progress_bar=False, batch_size=32)
cpu_time = time.time() - start
print(f" CPU时间: {cpu_time:.2f}秒")
print(f" 速度: {len(test_texts)/cpu_time:.1f} 文本/秒")
# GPU测试
if torch.cuda.is_available():
print("\n🟢 GPU嵌入测试...")
model_gpu = SentenceTransformer(
'sentence-transformers/all-MiniLM-L6-v2',
device='cuda'
)
start = time.time()
embeddings_gpu = model_gpu.encode(test_texts, show_progress_bar=False, batch_size=32)
gpu_time = time.time() - start
print(f" GPU时间: {gpu_time:.2f}秒")
print(f" 速度: {len(test_texts)/gpu_time:.1f} 文本/秒")
speedup = cpu_time / gpu_time
print(f"\n🚀 加速比: {speedup:.2f}x")
print(f" 节省时间: {cpu_time - gpu_time:.2f}秒")
return speedup
else:
print("\n⚠️ 跳过GPU测试")
return 1.0
except ImportError:
print("⚠️ sentence-transformers未安装")
print(" 安装: pip install sentence-transformers")
return None
def monitor_gpu_memory():
"""监控GPU显存使用"""
if not torch.cuda.is_available():
return
print_section("💾 GPU显存使用情况")
allocated = torch.cuda.memory_allocated(0) / (1024**3)
reserved = torch.cuda.memory_reserved(0) / (1024**3)
total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
print(f"已分配: {allocated:.2f} GB")
print(f"已保留: {reserved:.2f} GB")
print(f"总显存: {total:.2f} GB")
print(f"使用率: {(allocated/total)*100:.1f}%")
def generate_performance_report(matrix_speedup, embedding_speedup):
"""生成性能报告"""
print_section("📈 性能测试总结报告")
print("🖥️ 硬件信息:")
if torch.cuda.is_available():
print(f" GPU型号: {torch.cuda.get_device_name(0)}")
print(f" 显存: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
print(f" CUDA版本: {torch.version.cuda}")
else:
print(" ⚠️ GPU不可用")
print(f"\n PyTorch版本: {torch.__version__}")
print(f" Python版本: {sys.version.split()[0]}")
print("\n⚡ 性能测试结果:")
print(f" 矩阵运算加速: {matrix_speedup:.2f}x")
if embedding_speedup:
print(f" 文本嵌入加速: {embedding_speedup:.2f}x")
print("\n💡 建议:")
if torch.cuda.is_available():
print(" ✅ GPU运行良好!")
print(" ✅ 建议在Colab上运行完整的GraphRAG索引构建")
print(" ✅ 预计索引构建时间将缩短 3-5 倍")
# 估算时间节省
if embedding_speedup and embedding_speedup > 1:
print(f"\n⏱️ 时间节省估算:")
print(f" 100文档CPU耗时: ~15分钟")
print(f" 100文档GPU耗时: ~{15/embedding_speedup:.1f}分钟")
print(f" 节省: ~{15 - 15/embedding_speedup:.1f}分钟")
else:
print(" ⚠️ 建议启用GPU以获得最佳性能")
print(" ⚠️ Colab启用GPU: 运行时 → 更改运行时类型 → GPU")
def install_dependencies():
"""安装必要的依赖(仅在Colab中)"""
try:
import google.colab
is_colab = True
except:
is_colab = False
if is_colab:
print_section("📦 安装依赖")
print("检测到Colab环境,安装必要的包...\n")
import subprocess
packages = [
'sentence-transformers',
'networkx',
'python-louvain',
]
for package in packages:
try:
__import__(package.replace('-', '_'))
print(f"✅ {package} 已安装")
except ImportError:
print(f"📥 安装 {package}...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', package])
print(f"✅ {package} 安装完成")
def main():
"""主函数"""
print("\n" + "="*60)
print("🚀 Google Colab GPU检测和GraphRAG性能测试")
print("="*60)
# 检查是否在Colab中运行
try:
import google.colab
print("\n✅ 运行环境: Google Colab")
except:
print("\n⚠️ 警告: 未检测到Colab环境")
print(" 本脚本专为Google Colab设计")
# 安装依赖
install_dependencies()
# 1. GPU检测
gpu_available = test_gpu_availability()
# 2. 矩阵运算性能测试
matrix_speedup = benchmark_matrix_multiplication(matrix_size=5000)
# 3. 文本嵌入性能测试
embedding_speedup = test_text_embedding_performance()
# 4. 显存监控
if gpu_available:
monitor_gpu_memory()
# 5. 生成报告
generate_performance_report(matrix_speedup, embedding_speedup)
print("\n" + "="*60)
print("✅ 测试完成!")
print("="*60)
print("\n📚 下一步:")
print(" 1. 如果GPU测试成功,可以上传完整的adaptive_RAG项目")
print(" 2. 运行 main_graphrag.py 进行完整的知识图谱构建")
print(" 3. 享受GPU带来的3-5倍速度提升!")
if __name__ == "__main__":
main()
|