Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Google Colab环境下的GraphRAG完整运行脚本 | |
| 解决Ollama服务启动和GraphRAG运行的问题 | |
| 使用方法: | |
| 1. 在Colab中启用GPU | |
| 2. 复制此文件到Colab | |
| 3. 运行: !python colab_setup_and_run.py | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import subprocess | |
| import signal | |
| from pathlib import Path | |
| print("="*70) | |
| print("🚀 GraphRAG Colab 自动化部署脚本") | |
| print("="*70) | |
| # ============================================================ | |
| # 1️⃣ 检测Colab环境 | |
| # ============================================================ | |
| def check_colab_environment(): | |
| """检测是否在Colab环境中""" | |
| try: | |
| import google.colab | |
| print("\n✅ 运行环境: Google Colab") | |
| return True | |
| except ImportError: | |
| print("\n⚠️ 警告: 未检测到Colab环境") | |
| print(" 本脚本为Colab优化,在其他环境可能需要调整") | |
| return False | |
| # ============================================================ | |
| # 2️⃣ 安装Ollama | |
| # ============================================================ | |
| def install_ollama(): | |
| """在Colab中安装Ollama""" | |
| print("\n" + "="*70) | |
| print("📦 步骤1: 安装Ollama") | |
| print("="*70) | |
| # 检查是否已安装 | |
| if os.path.exists("/usr/local/bin/ollama"): | |
| print("✅ Ollama已安装") | |
| return True | |
| print("\n📥 下载并安装Ollama...") | |
| try: | |
| # 下载Ollama安装脚本 | |
| subprocess.run( | |
| ["curl", "-fsSL", "https://ollama.com/install.sh", "-o", "/tmp/install_ollama.sh"], | |
| check=True, | |
| capture_output=True | |
| ) | |
| # 执行安装 | |
| subprocess.run( | |
| ["sh", "/tmp/install_ollama.sh"], | |
| check=True, | |
| capture_output=True | |
| ) | |
| print("✅ Ollama安装成功") | |
| return True | |
| except subprocess.CalledProcessError as e: | |
| print(f"❌ Ollama安装失败: {e}") | |
| return False | |
| # ============================================================ | |
| # 3️⃣ 后台启动Ollama服务 | |
| # ============================================================ | |
| def start_ollama_service(): | |
| """在后台启动Ollama服务""" | |
| print("\n" + "="*70) | |
| print("🔧 步骤2: 启动Ollama服务") | |
| print("="*70) | |
| print("\n🔄 在后台启动Ollama服务...") | |
| # 方法1: 使用subprocess后台运行 | |
| try: | |
| # 启动Ollama服务(后台) | |
| ollama_process = subprocess.Popen( | |
| ["ollama", "serve"], | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.PIPE, | |
| preexec_fn=os.setpgrp # 创建新的进程组 | |
| ) | |
| # 等待服务启动 | |
| print("⏳ 等待Ollama服务启动...") | |
| time.sleep(5) | |
| # 检查服务是否运行 | |
| try: | |
| result = subprocess.run( | |
| ["curl", "-s", "http://localhost:11434/api/tags"], | |
| capture_output=True, | |
| timeout=3 | |
| ) | |
| if result.returncode == 0: | |
| print("✅ Ollama服务已启动 (PID: {})".format(ollama_process.pid)) | |
| # 保存进程ID以便后续管理 | |
| with open("/tmp/ollama.pid", "w") as f: | |
| f.write(str(ollama_process.pid)) | |
| return ollama_process | |
| else: | |
| print("⚠️ 服务启动可能有问题,继续尝试...") | |
| except subprocess.TimeoutExpired: | |
| print("⚠️ 服务检查超时,但进程已启动") | |
| return ollama_process | |
| except Exception as e: | |
| print(f"❌ 启动Ollama失败: {e}") | |
| return None | |
| # ============================================================ | |
| # 4️⃣ 下载Mistral模型 | |
| # ============================================================ | |
| def pull_mistral_model(): | |
| """下载Mistral模型""" | |
| print("\n" + "="*70) | |
| print("📥 步骤3: 下载Mistral模型") | |
| print("="*70) | |
| print("\n🔄 拉取mistral模型(这可能需要几分钟)...") | |
| try: | |
| # 检查模型是否已存在 | |
| result = subprocess.run( | |
| ["ollama", "list"], | |
| capture_output=True, | |
| text=True, | |
| timeout=10 | |
| ) | |
| if "mistral" in result.stdout: | |
| print("✅ Mistral模型已存在") | |
| return True | |
| # 下载模型 | |
| print("📥 开始下载Mistral模型...") | |
| process = subprocess.Popen( | |
| ["ollama", "pull", "mistral"], | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.STDOUT, | |
| text=True | |
| ) | |
| # 实时显示下载进度 | |
| for line in process.stdout: | |
| print(f" {line.strip()}") | |
| process.wait() | |
| if process.returncode == 0: | |
| print("✅ Mistral模型下载完成") | |
| return True | |
| else: | |
| print("❌ 模型下载失败") | |
| return False | |
| except Exception as e: | |
| print(f"❌ 下载Mistral模型失败: {e}") | |
| return False | |
| # ============================================================ | |
| # 5️⃣ 安装Python依赖 | |
| # ============================================================ | |
| def install_python_dependencies(): | |
| """安装GraphRAG所需的Python包""" | |
| print("\n" + "="*70) | |
| print("📦 步骤4: 安装Python依赖") | |
| print("="*70) | |
| packages = [ | |
| "langchain", | |
| "langchain-community", | |
| "langchain-core", | |
| "langgraph", | |
| "langchain-ollama", | |
| "chromadb", | |
| "sentence-transformers", | |
| "tiktoken", | |
| "beautifulsoup4", | |
| "requests", | |
| "tavily-python", | |
| "python-dotenv", | |
| "networkx", | |
| "python-louvain", | |
| "torch", | |
| "transformers" | |
| ] | |
| print("\n📥 安装必要的Python包...") | |
| for package in packages: | |
| try: | |
| __import__(package.replace("-", "_")) | |
| print(f"✅ {package} 已安装") | |
| except ImportError: | |
| print(f"📥 安装 {package}...") | |
| subprocess.run( | |
| [sys.executable, "-m", "pip", "install", "-q", package], | |
| check=True | |
| ) | |
| print("\n✅ 所有依赖安装完成") | |
| # ============================================================ | |
| # 6️⃣ 配置环境变量 | |
| # ============================================================ | |
| def setup_environment(): | |
| """配置环境变量""" | |
| print("\n" + "="*70) | |
| print("🔑 步骤5: 配置环境变量") | |
| print("="*70) | |
| # 检查.env文件 | |
| if os.path.exists(".env"): | |
| print("\n✅ 发现.env文件,加载配置...") | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| else: | |
| print("\n⚠️ 未找到.env文件") | |
| # 交互式输入API密钥 | |
| if "TAVILY_API_KEY" not in os.environ: | |
| from getpass import getpass | |
| api_key = getpass("请输入TAVILY_API_KEY (或按Enter跳过): ") | |
| if api_key: | |
| os.environ["TAVILY_API_KEY"] = api_key | |
| print("✅ TAVILY_API_KEY已设置") | |
| else: | |
| print("⚠️ 跳过TAVILY_API_KEY设置(网络搜索功能将不可用)") | |
| print("\n📋 当前环境变量:") | |
| print(f" TAVILY_API_KEY: {'已设置' if os.environ.get('TAVILY_API_KEY') else '未设置'}") | |
| # ============================================================ | |
| # 7️⃣ 运行GraphRAG | |
| # ============================================================ | |
| def run_graphrag(): | |
| """运行GraphRAG主程序""" | |
| print("\n" + "="*70) | |
| print("🚀 步骤6: 运行GraphRAG") | |
| print("="*70) | |
| # 检查main_graphrag.py是否存在 | |
| if not os.path.exists("main_graphrag.py"): | |
| print("\n❌ 未找到main_graphrag.py文件") | |
| print(" 请确保已上传项目文件到Colab") | |
| return False | |
| print("\n🔄 启动GraphRAG索引构建...\n") | |
| try: | |
| # 运行GraphRAG | |
| result = subprocess.run( | |
| [sys.executable, "main_graphrag.py"], | |
| capture_output=False, # 实时输出 | |
| text=True | |
| ) | |
| if result.returncode == 0: | |
| print("\n✅ GraphRAG运行成功!") | |
| return True | |
| else: | |
| print(f"\n❌ GraphRAG运行失败 (返回码: {result.returncode})") | |
| return False | |
| except KeyboardInterrupt: | |
| print("\n⚠️ 用户中断执行") | |
| return False | |
| except Exception as e: | |
| print(f"\n❌ 运行GraphRAG时出错: {e}") | |
| return False | |
| # ============================================================ | |
| # 8️⃣ 清理函数 | |
| # ============================================================ | |
| def cleanup(): | |
| """清理后台进程""" | |
| print("\n" + "="*70) | |
| print("🧹 清理后台进程") | |
| print("="*70) | |
| # 停止Ollama服务 | |
| if os.path.exists("/tmp/ollama.pid"): | |
| try: | |
| with open("/tmp/ollama.pid", "r") as f: | |
| pid = int(f.read().strip()) | |
| os.kill(pid, signal.SIGTERM) | |
| print(f"✅ Ollama服务已停止 (PID: {pid})") | |
| os.remove("/tmp/ollama.pid") | |
| except Exception as e: | |
| print(f"⚠️ 停止Ollama服务失败: {e}") | |
| # ============================================================ | |
| # 主函数 | |
| # ============================================================ | |
| def main(): | |
| """主执行流程""" | |
| ollama_process = None | |
| try: | |
| # 1. 检测环境 | |
| is_colab = check_colab_environment() | |
| # 2. 安装Ollama | |
| if not install_ollama(): | |
| print("\n❌ Ollama安装失败,无法继续") | |
| return | |
| # 3. 启动Ollama服务 | |
| ollama_process = start_ollama_service() | |
| if not ollama_process: | |
| print("\n❌ Ollama服务启动失败,无法继续") | |
| return | |
| # 4. 下载模型 | |
| if not pull_mistral_model(): | |
| print("\n❌ Mistral模型下载失败,无法继续") | |
| return | |
| # 5. 安装Python依赖 | |
| install_python_dependencies() | |
| # 6. 配置环境 | |
| setup_environment() | |
| # 7. 运行GraphRAG | |
| success = run_graphrag() | |
| if success: | |
| print("\n" + "="*70) | |
| print("✅ 所有任务完成!") | |
| print("="*70) | |
| print("\n📊 生成的文件:") | |
| if os.path.exists("data/knowledge_graph.json"): | |
| print(" ✅ data/knowledge_graph.json") | |
| # 提供下载选项 | |
| if is_colab: | |
| print("\n💾 下载结果:") | |
| print(" from google.colab import files") | |
| print(" files.download('data/knowledge_graph.json')") | |
| except KeyboardInterrupt: | |
| print("\n\n⚠️ 用户中断执行") | |
| except Exception as e: | |
| print(f"\n❌ 执行过程中出错: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| finally: | |
| # 清理 | |
| print("\n⚠️ 注意: Ollama服务仍在后台运行") | |
| print(" 如需停止: !pkill -f 'ollama serve'") | |
| print(" 或运行: cleanup()") | |
| if __name__ == "__main__": | |
| main() | |