PregoPal / start_services.py
J.B-Lin
Stabilize Gradio duplex voice flow
ffe2c9f
Raw
History Blame Contribute Delete
6.12 kB
"""
启动 PregoPal 全双工服务
========================
启动顺序:
1. start_llama_server() — 启动 llama.cpp-omni 的 llama-server(含 omni 支持)
2. start_api_server() — 启动 PregoAPI 后端
3. start_gradio() — 启动 PregoPal Gradio 前端
用法:
python start_services.py # 启动全部服务
python start_services.py --llama # 只启动 llama-server
python start_services.py --api # 只启动 API
python start_services.py --gradio # 只启动 Gradio
"""
import os
import sys
import time
import json
import subprocess
import logging
import requests as req
from pathlib import Path
logging.basicConfig(level=logging.INFO, format="[%(name)s] %(message)s")
logger = logging.getLogger("prego_starter")
BASE_DIR = Path(__file__).parent
LLAMA_SERVER_EXE = r"C:\Users\Andre\codes\LJB\llama.cpp-omni\build\bin\Release\llama-server.exe"
MODEL_DIR = r"C:\Users\Andre\codes\LJB\llama.cpp-omni\models"
MAIN_MODEL = os.path.join(MODEL_DIR, "MiniCPM-o-4_5-Q4_K_M.gguf")
VISION_PROJ = os.path.join(MODEL_DIR, "vision", "MiniCPM-o-4_5-vision-F16.gguf")
LLAMA_PORT = 8081
API_PORT = 8090
GRADIO_PORT = 7889
def wait_for_server(url: str, timeout: int = 120, interval: float = 1.0) -> bool:
"""等待 HTTP 服务器启动"""
start = time.time()
while time.time() - start < timeout:
try:
r = req.get(url, timeout=2)
if r.status_code == 200:
elapsed = time.time() - start
logger.info(f"✅ 服务就绪 ({elapsed:.0f}s): {url}")
return True
except:
pass
time.sleep(interval)
logger.error(f"⛔ 服务启动超时 ({timeout}s): {url}")
return False
def start_llama_server() -> subprocess.Popen:
"""启动 llama-server (omni 模式)"""
logger.info("启动 llama-server...")
cmd = [
LLAMA_SERVER_EXE,
"-m", MAIN_MODEL,
"--mmproj", VISION_PROJ,
"-c", "8192",
"--temp", "0.7",
"--repeat-penalty", "1.05",
"--host", "127.0.0.1",
"--port", str(LLAMA_PORT),
"-ngl", "99",
]
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
encoding="utf-8",
errors="replace",
)
if wait_for_server(f"http://127.0.0.1:{LLAMA_PORT}/health", timeout=180):
return proc
else:
proc.kill()
raise RuntimeError("llama-server 启动失败")
def start_api_server() -> subprocess.Popen:
"""启动 PregoAPI 后端"""
logger.info("启动 PregoAPI 后端...")
python = r"C:\Users\Andre\miniconda3\envs\trader_stable\python.exe"
api_script = str(BASE_DIR / "api" / "go_server.py")
# 设置环境变量
env = os.environ.copy()
env["LLAMA_SERVER_URL"] = f"http://127.0.0.1:{LLAMA_PORT}"
env["OMNI_OUTPUT_DIR"] = str(BASE_DIR / "omni_output")
proc = subprocess.Popen(
[python, api_script],
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
encoding="utf-8",
errors="replace",
)
if wait_for_server(f"http://127.0.0.1:{API_PORT}/health", timeout=30):
return proc
else:
proc.kill()
raise RuntimeError("API 服务启动失败")
def start_gradio() -> subprocess.Popen:
"""启动 Gradio 前端(使用现有 app.py)"""
logger.info("启动 Gradio 前端...")
python = r"C:\Users\Andre\miniconda3\envs\trader_stable\python.exe"
app_script = str(BASE_DIR / "app.py")
env = os.environ.copy()
# 告诉 app 使用本地 API
env["MINICPM_API_BASE"] = f"http://127.0.0.1:{API_PORT}"
proc = subprocess.Popen(
[python, app_script],
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
encoding="utf-8",
errors="replace",
)
return proc
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="PregoPal 服务管理器")
parser.add_argument("--llama", action="store_true", help="只启动 llama-server")
parser.add_argument("--api", action="store_true", help="只启动 API 后端")
parser.add_argument("--gradio", action="store_true", help="只启动 Gradio 前端")
args = parser.parse_args()
should_llama = args.llama or not (args.api or args.gradio)
should_api = args.api or not (args.llama or args.gradio)
should_gradio = args.gradio or not (args.llama or args.api)
processes = []
try:
if should_llama:
logger.info("=" * 50)
logger.info("1️⃣ 启动 llama-server...")
logger.info("=" * 50)
llama_proc = start_llama_server()
processes.append(("llama-server", llama_proc))
if should_api:
logger.info("=" * 50)
logger.info("2️⃣ 启动 PregoAPI 后端...")
logger.info("=" * 50)
api_proc = start_api_server()
processes.append(("PregoAPI", api_proc))
if should_gradio:
logger.info("=" * 50)
logger.info("3️⃣ 启动 Gradio 前端...")
logger.info("=" * 50)
gradio_proc = start_gradio()
processes.append(("Gradio", gradio_proc))
logger.info(f"🌐 请访问: http://127.0.0.1:{GRADIO_PORT}")
logger.info("=" * 50)
logger.info("✅ 所有服务启动完成! 按 Ctrl+C 停止")
logger.info("=" * 50)
# 保持运行
for name, proc in processes:
try:
for line in proc.stdout:
print(f"[{name}] {line.rstrip()}")
except:
pass
except KeyboardInterrupt:
logger.info("\n正在停止服务...")
finally:
for name, proc in processes:
logger.info(f"停止 {name}...")
proc.terminate()
try:
proc.wait(timeout=5)
except:
proc.kill()
logger.info("所有服务已停止")