Spaces:

huzpsb
/

test

Runtime error

App Files Files

huzpsb commited on Feb 18

Commit

3b4f893

verified ·

1 Parent(s): 6b22d2a

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -20

app.py CHANGED Viewed

@@ -15,57 +15,77 @@ REPO_ID = "huzpsb/heru"
 FILENAME = "qwq_q4k.gguf"
 def setup_server():
-    """下载并启动 llama-server"""
     # 1. 下载模型
     print(f"[*] Downloading model: {FILENAME}...")
     model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
-    # 2. 下载并解压 binary
     if not os.path.exists(BINARY_NAME):
-        print("[*] Downloading llama.cpp binary...")
         response = requests.get(LLAMA_CPP_RELEASE_URL, stream=True)
         with open("llama.tar.gz", "wb") as f:
             f.write(response.content)
         with tarfile.open("llama.tar.gz", "r:gz") as tar:
             for member in tar.getmembers():
-                if member.name.endswith(BINARY_NAME):
-                    member.name = BINARY_NAME
                     tar.extract(member, path=".")
-                    break
-        os.chmod(BINARY_NAME, 0o755)
-    # 3. 启动后台进程 (修改点：将输出直接导向 sys.stdout/stderr)
-    print("[*] Starting llama-server...")
     cmd = [
         f"./{BINARY_NAME}",
         "-m", model_path,
         "--port", SERVER_PORT,
         "--ctx-size", "8192",
         "--n-gpu-layers", "0",
-        "--host", "127.0.0.1",
-        "--log-disable" # 禁用 llama-server 自带的冗长格式，或根据需要开启
     ]
-    # 使用 subprocess.DEVNULL 或直接导向系统标准流，方便在 HF Logs 查看
-    # 注意：不要用 PIPE 除非你开启线程去 read 它，否则缓冲区满会导致进程卡死
     proc = subprocess.Popen(
         cmd,
-        stdout=sys.stdout,
-        stderr=sys.stderr
     )
     # 4. 健康检查
     retries = 0
-    while retries < 30:
         try:
-            requests.get(f"http://127.0.0.1:{SERVER_PORT}/health")
-            print("[*] Llama-server is ready!")
-            return proc
         except:
             time.sleep(2)
             retries += 1
-    raise Exception("Server failed to start.")
 # 初始化
 server_process = setup_server()

 FILENAME = "qwq_q4k.gguf"
 def setup_server():
+    """下载并启动 llama-server，处理动态链接库"""
     # 1. 下载模型
     print(f"[*] Downloading model: {FILENAME}...")
     model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+    # 2. 下载并完整解压 llama.cpp binary
     if not os.path.exists(BINARY_NAME):
+        print("[*] Downloading llama.cpp binary package...")
         response = requests.get(LLAMA_CPP_RELEASE_URL, stream=True)
         with open("llama.tar.gz", "wb") as f:
             f.write(response.content)
+        print("[*] Extracting all files from package...")
         with tarfile.open("llama.tar.gz", "r:gz") as tar:
+            # 找到包含二进制文件的目录（通常在 build/bin/ 下）
+            # 我们直接把所有文件提取到当前目录，简化路径处理
             for member in tar.getmembers():
+                if member.isfile():
+                    # 只提取 bin 目录下的东西，或者根据你的 release 包结构调整
+                    # 这里的逻辑是：如果是文件，就去掉路径直接放在根目录
+                    member.name = os.path.basename(member.name)
                     tar.extract(member, path=".")
+        # 赋予主程序执行权限
+        if os.path.exists(BINARY_NAME):
+            os.chmod(BINARY_NAME, 0o755)
+        else:
+            raise Exception(f"Could not find {BINARY_NAME} in the extracted files.")
+    # 3. 启动后台进程
+    print("[*] Starting llama-server with LD_LIBRARY_PATH...")
+    # 关键修改：设置环境变量，让系统在当前目录 (.) 查找 .so 库文件
+    new_env = os.environ.copy()
+    current_dir = os.getcwd()
+    # 将当前目录加入动态库搜索路径
+    new_env["LD_LIBRARY_PATH"] = f"{current_dir}:{new_env.get('LD_LIBRARY_PATH', '')}"
     cmd = [
         f"./{BINARY_NAME}",
         "-m", model_path,
         "--port", SERVER_PORT,
         "--ctx-size", "8192",
         "--n-gpu-layers", "0",
+        "--host", "127.0.0.1"
     ]
+    # 传入 env=new_env
     proc = subprocess.Popen(
         cmd,
+        stdout=sys.stdout,
+        stderr=sys.stderr,
+        env=new_env
     )
     # 4. 健康检查
+    print("[*] Waiting for server to respond...")
     retries = 0
+    while retries < 60: # 稍微延长等待时间，因为加载库可能变慢
         try:
+            r = requests.get(f"http://127.0.0.1:{SERVER_PORT}/health")
+            if r.status_code == 200:
+                print("[*] Server is ready!")
+                return proc
         except:
             time.sleep(2)
             retries += 1
+            if retries % 5 == 0:
+                print(f"[*] Still waiting... ({retries}/60)")
+    raise Exception("Server failed to start. Check logs above for missing .so files.")
 # 初始化
 server_process = setup_server()