Spaces:
Paused
Paused
File size: 11,549 Bytes
399f3c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 |
#!/usr/bin/env python3
"""
Google Colab环境下的GraphRAG完整运行脚本
解决Ollama服务启动和GraphRAG运行的问题
使用方法:
1. 在Colab中启用GPU
2. 复制此文件到Colab
3. 运行: !python colab_setup_and_run.py
"""
import os
import sys
import time
import subprocess
import signal
from pathlib import Path
print("="*70)
print("🚀 GraphRAG Colab 自动化部署脚本")
print("="*70)
# ============================================================
# 1️⃣ 检测Colab环境
# ============================================================
def check_colab_environment():
"""检测是否在Colab环境中"""
try:
import google.colab
print("\n✅ 运行环境: Google Colab")
return True
except ImportError:
print("\n⚠️ 警告: 未检测到Colab环境")
print(" 本脚本为Colab优化,在其他环境可能需要调整")
return False
# ============================================================
# 2️⃣ 安装Ollama
# ============================================================
def install_ollama():
"""在Colab中安装Ollama"""
print("\n" + "="*70)
print("📦 步骤1: 安装Ollama")
print("="*70)
# 检查是否已安装
if os.path.exists("/usr/local/bin/ollama"):
print("✅ Ollama已安装")
return True
print("\n📥 下载并安装Ollama...")
try:
# 下载Ollama安装脚本
subprocess.run(
["curl", "-fsSL", "https://ollama.com/install.sh", "-o", "/tmp/install_ollama.sh"],
check=True,
capture_output=True
)
# 执行安装
subprocess.run(
["sh", "/tmp/install_ollama.sh"],
check=True,
capture_output=True
)
print("✅ Ollama安装成功")
return True
except subprocess.CalledProcessError as e:
print(f"❌ Ollama安装失败: {e}")
return False
# ============================================================
# 3️⃣ 后台启动Ollama服务
# ============================================================
def start_ollama_service():
"""在后台启动Ollama服务"""
print("\n" + "="*70)
print("🔧 步骤2: 启动Ollama服务")
print("="*70)
print("\n🔄 在后台启动Ollama服务...")
# 方法1: 使用subprocess后台运行
try:
# 启动Ollama服务(后台)
ollama_process = subprocess.Popen(
["ollama", "serve"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
preexec_fn=os.setpgrp # 创建新的进程组
)
# 等待服务启动
print("⏳ 等待Ollama服务启动...")
time.sleep(5)
# 检查服务是否运行
try:
result = subprocess.run(
["curl", "-s", "http://localhost:11434/api/tags"],
capture_output=True,
timeout=3
)
if result.returncode == 0:
print("✅ Ollama服务已启动 (PID: {})".format(ollama_process.pid))
# 保存进程ID以便后续管理
with open("/tmp/ollama.pid", "w") as f:
f.write(str(ollama_process.pid))
return ollama_process
else:
print("⚠️ 服务启动可能有问题,继续尝试...")
except subprocess.TimeoutExpired:
print("⚠️ 服务检查超时,但进程已启动")
return ollama_process
except Exception as e:
print(f"❌ 启动Ollama失败: {e}")
return None
# ============================================================
# 4️⃣ 下载Mistral模型
# ============================================================
def pull_mistral_model():
"""下载Mistral模型"""
print("\n" + "="*70)
print("📥 步骤3: 下载Mistral模型")
print("="*70)
print("\n🔄 拉取mistral模型(这可能需要几分钟)...")
try:
# 检查模型是否已存在
result = subprocess.run(
["ollama", "list"],
capture_output=True,
text=True,
timeout=10
)
if "mistral" in result.stdout:
print("✅ Mistral模型已存在")
return True
# 下载模型
print("📥 开始下载Mistral模型...")
process = subprocess.Popen(
["ollama", "pull", "mistral"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True
)
# 实时显示下载进度
for line in process.stdout:
print(f" {line.strip()}")
process.wait()
if process.returncode == 0:
print("✅ Mistral模型下载完成")
return True
else:
print("❌ 模型下载失败")
return False
except Exception as e:
print(f"❌ 下载Mistral模型失败: {e}")
return False
# ============================================================
# 5️⃣ 安装Python依赖
# ============================================================
def install_python_dependencies():
"""安装GraphRAG所需的Python包"""
print("\n" + "="*70)
print("📦 步骤4: 安装Python依赖")
print("="*70)
packages = [
"langchain",
"langchain-community",
"langchain-core",
"langgraph",
"langchain-ollama",
"chromadb",
"sentence-transformers",
"tiktoken",
"beautifulsoup4",
"requests",
"tavily-python",
"python-dotenv",
"networkx",
"python-louvain",
"torch",
"transformers"
]
print("\n📥 安装必要的Python包...")
for package in packages:
try:
__import__(package.replace("-", "_"))
print(f"✅ {package} 已安装")
except ImportError:
print(f"📥 安装 {package}...")
subprocess.run(
[sys.executable, "-m", "pip", "install", "-q", package],
check=True
)
print("\n✅ 所有依赖安装完成")
# ============================================================
# 6️⃣ 配置环境变量
# ============================================================
def setup_environment():
"""配置环境变量"""
print("\n" + "="*70)
print("🔑 步骤5: 配置环境变量")
print("="*70)
# 检查.env文件
if os.path.exists(".env"):
print("\n✅ 发现.env文件,加载配置...")
from dotenv import load_dotenv
load_dotenv()
else:
print("\n⚠️ 未找到.env文件")
# 交互式输入API密钥
if "TAVILY_API_KEY" not in os.environ:
from getpass import getpass
api_key = getpass("请输入TAVILY_API_KEY (或按Enter跳过): ")
if api_key:
os.environ["TAVILY_API_KEY"] = api_key
print("✅ TAVILY_API_KEY已设置")
else:
print("⚠️ 跳过TAVILY_API_KEY设置(网络搜索功能将不可用)")
print("\n📋 当前环境变量:")
print(f" TAVILY_API_KEY: {'已设置' if os.environ.get('TAVILY_API_KEY') else '未设置'}")
# ============================================================
# 7️⃣ 运行GraphRAG
# ============================================================
def run_graphrag():
"""运行GraphRAG主程序"""
print("\n" + "="*70)
print("🚀 步骤6: 运行GraphRAG")
print("="*70)
# 检查main_graphrag.py是否存在
if not os.path.exists("main_graphrag.py"):
print("\n❌ 未找到main_graphrag.py文件")
print(" 请确保已上传项目文件到Colab")
return False
print("\n🔄 启动GraphRAG索引构建...\n")
try:
# 运行GraphRAG
result = subprocess.run(
[sys.executable, "main_graphrag.py"],
capture_output=False, # 实时输出
text=True
)
if result.returncode == 0:
print("\n✅ GraphRAG运行成功!")
return True
else:
print(f"\n❌ GraphRAG运行失败 (返回码: {result.returncode})")
return False
except KeyboardInterrupt:
print("\n⚠️ 用户中断执行")
return False
except Exception as e:
print(f"\n❌ 运行GraphRAG时出错: {e}")
return False
# ============================================================
# 8️⃣ 清理函数
# ============================================================
def cleanup():
"""清理后台进程"""
print("\n" + "="*70)
print("🧹 清理后台进程")
print("="*70)
# 停止Ollama服务
if os.path.exists("/tmp/ollama.pid"):
try:
with open("/tmp/ollama.pid", "r") as f:
pid = int(f.read().strip())
os.kill(pid, signal.SIGTERM)
print(f"✅ Ollama服务已停止 (PID: {pid})")
os.remove("/tmp/ollama.pid")
except Exception as e:
print(f"⚠️ 停止Ollama服务失败: {e}")
# ============================================================
# 主函数
# ============================================================
def main():
"""主执行流程"""
ollama_process = None
try:
# 1. 检测环境
is_colab = check_colab_environment()
# 2. 安装Ollama
if not install_ollama():
print("\n❌ Ollama安装失败,无法继续")
return
# 3. 启动Ollama服务
ollama_process = start_ollama_service()
if not ollama_process:
print("\n❌ Ollama服务启动失败,无法继续")
return
# 4. 下载模型
if not pull_mistral_model():
print("\n❌ Mistral模型下载失败,无法继续")
return
# 5. 安装Python依赖
install_python_dependencies()
# 6. 配置环境
setup_environment()
# 7. 运行GraphRAG
success = run_graphrag()
if success:
print("\n" + "="*70)
print("✅ 所有任务完成!")
print("="*70)
print("\n📊 生成的文件:")
if os.path.exists("data/knowledge_graph.json"):
print(" ✅ data/knowledge_graph.json")
# 提供下载选项
if is_colab:
print("\n💾 下载结果:")
print(" from google.colab import files")
print(" files.download('data/knowledge_graph.json')")
except KeyboardInterrupt:
print("\n\n⚠️ 用户中断执行")
except Exception as e:
print(f"\n❌ 执行过程中出错: {e}")
import traceback
traceback.print_exc()
finally:
# 清理
print("\n⚠️ 注意: Ollama服务仍在后台运行")
print(" 如需停止: !pkill -f 'ollama serve'")
print(" 或运行: cleanup()")
if __name__ == "__main__":
main()
|