huzpsb commited on
Commit
3b4f893
·
verified ·
1 Parent(s): 6b22d2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -20
app.py CHANGED
@@ -15,57 +15,77 @@ REPO_ID = "huzpsb/heru"
15
  FILENAME = "qwq_q4k.gguf"
16
 
17
  def setup_server():
18
- """下载并启动 llama-server"""
19
  # 1. 下载模型
20
  print(f"[*] Downloading model: {FILENAME}...")
21
  model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
22
 
23
- # 2. 下载并解压 binary
24
  if not os.path.exists(BINARY_NAME):
25
- print("[*] Downloading llama.cpp binary...")
26
  response = requests.get(LLAMA_CPP_RELEASE_URL, stream=True)
27
  with open("llama.tar.gz", "wb") as f:
28
  f.write(response.content)
29
 
 
30
  with tarfile.open("llama.tar.gz", "r:gz") as tar:
 
 
31
  for member in tar.getmembers():
32
- if member.name.endswith(BINARY_NAME):
33
- member.name = BINARY_NAME
 
 
34
  tar.extract(member, path=".")
35
- break
36
- os.chmod(BINARY_NAME, 0o755)
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # 3. 启动后台进程 (修改点:将输出直接导向 sys.stdout/stderr)
39
- print("[*] Starting llama-server...")
40
  cmd = [
41
  f"./{BINARY_NAME}",
42
  "-m", model_path,
43
  "--port", SERVER_PORT,
44
  "--ctx-size", "8192",
45
  "--n-gpu-layers", "0",
46
- "--host", "127.0.0.1",
47
- "--log-disable" # 禁用 llama-server 自带的冗长格式,或根据需要开启
48
  ]
49
 
50
- # 使用 subprocess.DEVNULL 或直接导向系统标准流,方便在 HF Logs 查看
51
- # 注意:不要用 PIPE 除非你开启线程去 read 它,否则缓冲区满会导致进程卡死
52
  proc = subprocess.Popen(
53
  cmd,
54
- stdout=sys.stdout,
55
- stderr=sys.stderr
 
56
  )
57
 
58
  # 4. 健康检查
 
59
  retries = 0
60
- while retries < 30:
61
  try:
62
- requests.get(f"http://127.0.0.1:{SERVER_PORT}/health")
63
- print("[*] Llama-server is ready!")
64
- return proc
 
65
  except:
66
  time.sleep(2)
67
  retries += 1
68
- raise Exception("Server failed to start.")
 
 
 
69
 
70
  # 初始化
71
  server_process = setup_server()
 
15
  FILENAME = "qwq_q4k.gguf"
16
 
17
  def setup_server():
18
+ """下载并启动 llama-server,处理动态链接库"""
19
  # 1. 下载模型
20
  print(f"[*] Downloading model: {FILENAME}...")
21
  model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
22
 
23
+ # 2. 下载并完整解压 llama.cpp binary
24
  if not os.path.exists(BINARY_NAME):
25
+ print("[*] Downloading llama.cpp binary package...")
26
  response = requests.get(LLAMA_CPP_RELEASE_URL, stream=True)
27
  with open("llama.tar.gz", "wb") as f:
28
  f.write(response.content)
29
 
30
+ print("[*] Extracting all files from package...")
31
  with tarfile.open("llama.tar.gz", "r:gz") as tar:
32
+ # 找到包含二进制文件的目录(通常在 build/bin/ 下)
33
+ # 我们直接把所有文件提取到当前目录,简化路径处理
34
  for member in tar.getmembers():
35
+ if member.isfile():
36
+ # 只提取 bin 目录下的东西,或者根据你的 release 包结构调整
37
+ # 这里的逻辑是:如果是文件,就去掉路径直接放在根目录
38
+ member.name = os.path.basename(member.name)
39
  tar.extract(member, path=".")
40
+
41
+ # 赋予主程序执行权限
42
+ if os.path.exists(BINARY_NAME):
43
+ os.chmod(BINARY_NAME, 0o755)
44
+ else:
45
+ raise Exception(f"Could not find {BINARY_NAME} in the extracted files.")
46
+
47
+ # 3. 启动后台进程
48
+ print("[*] Starting llama-server with LD_LIBRARY_PATH...")
49
+
50
+ # 关键修改:设置环境变量,让系统在当前目录 (.) 查找 .so 库文件
51
+ new_env = os.environ.copy()
52
+ current_dir = os.getcwd()
53
+ # 将当前目录加入动态库搜索路径
54
+ new_env["LD_LIBRARY_PATH"] = f"{current_dir}:{new_env.get('LD_LIBRARY_PATH', '')}"
55
 
 
 
56
  cmd = [
57
  f"./{BINARY_NAME}",
58
  "-m", model_path,
59
  "--port", SERVER_PORT,
60
  "--ctx-size", "8192",
61
  "--n-gpu-layers", "0",
62
+ "--host", "127.0.0.1"
 
63
  ]
64
 
65
+ # 传入 env=new_env
 
66
  proc = subprocess.Popen(
67
  cmd,
68
+ stdout=sys.stdout,
69
+ stderr=sys.stderr,
70
+ env=new_env
71
  )
72
 
73
  # 4. 健康检查
74
+ print("[*] Waiting for server to respond...")
75
  retries = 0
76
+ while retries < 60: # 稍微延长等待时间,因为加载库可能变慢
77
  try:
78
+ r = requests.get(f"http://127.0.0.1:{SERVER_PORT}/health")
79
+ if r.status_code == 200:
80
+ print("[*] Server is ready!")
81
+ return proc
82
  except:
83
  time.sleep(2)
84
  retries += 1
85
+ if retries % 5 == 0:
86
+ print(f"[*] Still waiting... ({retries}/60)")
87
+
88
+ raise Exception("Server failed to start. Check logs above for missing .so files.")
89
 
90
  # 初始化
91
  server_process = setup_server()