Spaces:
Sleeping
Sleeping
Compile llama.cpp in Dockerfile for architecture compatibility
Browse files- Dockerfile +19 -2
- llm_clients/qwen_translator.py +78 -88
Dockerfile
CHANGED
|
@@ -3,15 +3,32 @@ FROM python:3.10-slim
|
|
| 3 |
# Set working directory
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
-
# Install system dependencies for PDF processing and other requirements
|
| 7 |
-
# Note: llama.cpp binary is downloaded at runtime, no compilation needed
|
| 8 |
RUN apt-get update && apt-get install -y \
|
| 9 |
gcc \
|
| 10 |
g++ \
|
| 11 |
git \
|
| 12 |
unzip \
|
|
|
|
|
|
|
|
|
|
| 13 |
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Create a user to avoid running as root
|
| 16 |
RUN useradd -m -u 1000 user
|
| 17 |
USER user
|
|
|
|
| 3 |
# Set working directory
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
+
# Install system dependencies for PDF processing, llama.cpp compilation, and other requirements
|
|
|
|
| 7 |
RUN apt-get update && apt-get install -y \
|
| 8 |
gcc \
|
| 9 |
g++ \
|
| 10 |
git \
|
| 11 |
unzip \
|
| 12 |
+
cmake \
|
| 13 |
+
make \
|
| 14 |
+
build-essential \
|
| 15 |
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
|
| 17 |
+
# Compile llama.cpp from source (for translation feature)
|
| 18 |
+
# This ensures compatibility with the container's architecture
|
| 19 |
+
RUN cd /tmp && \
|
| 20 |
+
git clone --depth 1 --branch master https://github.com/ggerganov/llama.cpp.git && \
|
| 21 |
+
cd llama.cpp && \
|
| 22 |
+
mkdir build && \
|
| 23 |
+
cd build && \
|
| 24 |
+
cmake .. -DCMAKE_BUILD_TYPE=Release && \
|
| 25 |
+
cmake --build . --config Release -j$(nproc) && \
|
| 26 |
+
cp bin/main /usr/local/bin/llama-main && \
|
| 27 |
+
chmod 755 /usr/local/bin/llama-main && \
|
| 28 |
+
cd / && \
|
| 29 |
+
rm -rf /tmp/llama.cpp && \
|
| 30 |
+
echo "✅ llama.cpp compiled and installed to /usr/local/bin/llama-main"
|
| 31 |
+
|
| 32 |
# Create a user to avoid running as root
|
| 33 |
RUN useradd -m -u 1000 user
|
| 34 |
USER user
|
llm_clients/qwen_translator.py
CHANGED
|
@@ -50,15 +50,16 @@ class QwenTranslatorClient(LlmClient):
|
|
| 50 |
|
| 51 |
@classmethod
|
| 52 |
def _download_binary(cls) -> str:
|
| 53 |
-
"""
|
| 54 |
-
# Check OS - the
|
| 55 |
if sys.platform == "win32":
|
| 56 |
raise RuntimeError(
|
| 57 |
"Translation with llama.cpp binary is not supported on Windows. "
|
| 58 |
-
"The
|
| 59 |
"Please use this feature on Linux or Hugging Face Spaces."
|
| 60 |
)
|
| 61 |
|
|
|
|
| 62 |
if cls._binary_path and os.path.exists(cls._binary_path):
|
| 63 |
# Verify it's still executable
|
| 64 |
if os.access(cls._binary_path, os.X_OK):
|
|
@@ -71,10 +72,24 @@ class QwenTranslatorClient(LlmClient):
|
|
| 71 |
return cls._binary_path
|
| 72 |
except Exception:
|
| 73 |
pass
|
| 74 |
-
# If we can't fix it,
|
| 75 |
cls._binary_path = None
|
| 76 |
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
# Create a temporary directory for the binary
|
| 80 |
if cls._binary_dir is None:
|
|
@@ -97,38 +112,8 @@ class QwenTranslatorClient(LlmClient):
|
|
| 97 |
# If not found, we'll search after extraction
|
| 98 |
binary_path = binary_dir / "main" # Default to 'main' (standard llama.cpp binary name)
|
| 99 |
|
| 100 |
-
#
|
| 101 |
-
|
| 102 |
-
is_linux = sys.platform.startswith('linux')
|
| 103 |
-
|
| 104 |
-
# Map architectures to available binaries
|
| 105 |
-
# Available: ubuntu-x64, ubuntu-s390x, ubuntu-vulkan-x64, macos-arm64, macos-x64, win-cpu-x64, win-cpu-arm64
|
| 106 |
-
binary_name = "unknown"
|
| 107 |
-
if is_linux:
|
| 108 |
-
if machine in ['x86_64', 'amd64']:
|
| 109 |
-
# Try standard Ubuntu x64 first (most common)
|
| 110 |
-
zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-x64.zip"
|
| 111 |
-
binary_name = "ubuntu-x64"
|
| 112 |
-
elif machine == 's390x':
|
| 113 |
-
zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-s390x.zip"
|
| 114 |
-
binary_name = "ubuntu-s390x"
|
| 115 |
-
elif machine in ['aarch64', 'arm64']:
|
| 116 |
-
# No ARM Linux binary available, will need to handle this
|
| 117 |
-
raise RuntimeError(
|
| 118 |
-
f"ARM64 Linux architecture detected, but no pre-built ARM64 Linux binary is available. "
|
| 119 |
-
f"Available binaries are for x86_64, s390x, macOS ARM64, and Windows. "
|
| 120 |
-
f"Translation feature cannot work on ARM64 Linux without compiling from source."
|
| 121 |
-
)
|
| 122 |
-
else:
|
| 123 |
-
# Try x64 as fallback
|
| 124 |
-
print(f" ⚠️ Unknown Linux architecture '{machine}', trying x64 binary...", flush=True)
|
| 125 |
-
zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-x64.zip"
|
| 126 |
-
binary_name = "ubuntu-x64"
|
| 127 |
-
else:
|
| 128 |
-
# Non-Linux systems (shouldn't reach here due to earlier check, but just in case)
|
| 129 |
-
raise RuntimeError(f"Unsupported platform: {sys.platform}")
|
| 130 |
-
|
| 131 |
-
print(f" Selected binary: {binary_name} for architecture: {machine}", flush=True)
|
| 132 |
zip_path = binary_dir / "llama-binary.zip"
|
| 133 |
|
| 134 |
try:
|
|
@@ -220,58 +205,63 @@ class QwenTranslatorClient(LlmClient):
|
|
| 220 |
print(f" ⚠️ Could not set permissions: {e}", flush=True)
|
| 221 |
|
| 222 |
# Test if binary can actually run (check architecture compatibility)
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
print(f" System architecture: {detected_machine}", flush=True)
|
| 226 |
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
# Clean up zip file
|
| 277 |
try:
|
|
|
|
| 50 |
|
| 51 |
@classmethod
|
| 52 |
def _download_binary(cls) -> str:
|
| 53 |
+
"""Get the llama.cpp binary - prefer compiled version, fallback to downloaded."""
|
| 54 |
+
# Check OS - the binary only works on Linux
|
| 55 |
if sys.platform == "win32":
|
| 56 |
raise RuntimeError(
|
| 57 |
"Translation with llama.cpp binary is not supported on Windows. "
|
| 58 |
+
"The binary is for Linux only. "
|
| 59 |
"Please use this feature on Linux or Hugging Face Spaces."
|
| 60 |
)
|
| 61 |
|
| 62 |
+
# First, check if we have a cached binary path
|
| 63 |
if cls._binary_path and os.path.exists(cls._binary_path):
|
| 64 |
# Verify it's still executable
|
| 65 |
if os.access(cls._binary_path, os.X_OK):
|
|
|
|
| 72 |
return cls._binary_path
|
| 73 |
except Exception:
|
| 74 |
pass
|
| 75 |
+
# If we can't fix it, reset
|
| 76 |
cls._binary_path = None
|
| 77 |
|
| 78 |
+
# Check for compiled binary (from Dockerfile) first
|
| 79 |
+
compiled_binary_paths = [
|
| 80 |
+
"/usr/local/bin/llama-main", # Standard location from Dockerfile
|
| 81 |
+
"/app/llama.cpp/build/bin/main", # Alternative location
|
| 82 |
+
"/usr/bin/llama-main", # Another possible location
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
for compiled_path in compiled_binary_paths:
|
| 86 |
+
if os.path.exists(compiled_path) and os.access(compiled_path, os.X_OK):
|
| 87 |
+
cls._binary_path = compiled_path
|
| 88 |
+
print(f"✅ Using compiled llama.cpp binary at: {compiled_path}", flush=True)
|
| 89 |
+
return cls._binary_path
|
| 90 |
+
|
| 91 |
+
# If no compiled binary found, download pre-built binary as fallback
|
| 92 |
+
print("📥 No compiled binary found, downloading pre-built llama.cpp binary...", flush=True)
|
| 93 |
|
| 94 |
# Create a temporary directory for the binary
|
| 95 |
if cls._binary_dir is None:
|
|
|
|
| 112 |
# If not found, we'll search after extraction
|
| 113 |
binary_path = binary_dir / "main" # Default to 'main' (standard llama.cpp binary name)
|
| 114 |
|
| 115 |
+
# Download the zip file
|
| 116 |
+
zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6995/llama-b6995-bin-ubuntu-x64.zip"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
zip_path = binary_dir / "llama-binary.zip"
|
| 118 |
|
| 119 |
try:
|
|
|
|
| 205 |
print(f" ⚠️ Could not set permissions: {e}", flush=True)
|
| 206 |
|
| 207 |
# Test if binary can actually run (check architecture compatibility)
|
| 208 |
+
# Skip test for compiled binaries (they should work)
|
| 209 |
+
is_compiled = cls._binary_path.startswith("/usr/local/bin") or cls._binary_path.startswith("/app/llama.cpp")
|
|
|
|
| 210 |
|
| 211 |
+
if not is_compiled:
|
| 212 |
+
print(f" 🔍 Testing binary compatibility...", flush=True)
|
| 213 |
+
machine = platform.machine()
|
| 214 |
+
print(f" System architecture: {machine}", flush=True)
|
| 215 |
+
|
| 216 |
+
try:
|
| 217 |
+
# Try to run the binary with --help to verify it works
|
| 218 |
+
test_result = subprocess.run(
|
| 219 |
+
[cls._binary_path, "--help"],
|
| 220 |
+
capture_output=True,
|
| 221 |
+
text=True,
|
| 222 |
+
timeout=5
|
| 223 |
+
)
|
| 224 |
+
if test_result.returncode == 0 or "usage" in test_result.stdout.lower() or "options" in test_result.stdout.lower():
|
| 225 |
+
print(f" ✅ Binary is compatible and executable", flush=True)
|
| 226 |
+
else:
|
| 227 |
+
print(f" ⚠️ Binary test returned code {test_result.returncode}", flush=True)
|
| 228 |
+
if test_result.stderr:
|
| 229 |
+
print(f" Stderr: {test_result.stderr[:200]}", flush=True)
|
| 230 |
+
except subprocess.TimeoutExpired:
|
| 231 |
+
print(f" ⚠️ Binary test timed out", flush=True)
|
| 232 |
+
except OSError as os_error:
|
| 233 |
+
error_msg = str(os_error)
|
| 234 |
+
errno = getattr(os_error, 'errno', None)
|
| 235 |
+
if errno == 8 or "Exec format error" in error_msg or "cannot execute" in error_msg.lower():
|
| 236 |
+
# Check what the binary actually is using 'file' command if available
|
| 237 |
+
file_info = "unknown"
|
| 238 |
+
try:
|
| 239 |
+
file_result = subprocess.run(
|
| 240 |
+
["file", cls._binary_path],
|
| 241 |
+
capture_output=True,
|
| 242 |
+
text=True,
|
| 243 |
+
timeout=2
|
| 244 |
+
)
|
| 245 |
+
if file_result.returncode == 0:
|
| 246 |
+
file_info = file_result.stdout.strip()
|
| 247 |
+
except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
|
| 248 |
+
pass
|
| 249 |
+
|
| 250 |
+
raise RuntimeError(
|
| 251 |
+
f"Binary architecture mismatch. The downloaded binary is not compatible with this system.\n"
|
| 252 |
+
f"System architecture: {machine}\n"
|
| 253 |
+
f"Binary info: {file_info}\n"
|
| 254 |
+
f"Error: {error_msg}\n"
|
| 255 |
+
f"The Ubuntu x64 binary may not be compatible with this system. "
|
| 256 |
+
f"Translation feature requires a compatible llama.cpp binary for this architecture."
|
| 257 |
+
) from os_error
|
| 258 |
+
else:
|
| 259 |
+
raise
|
| 260 |
+
except Exception as test_error:
|
| 261 |
+
error_msg = str(test_error)
|
| 262 |
+
print(f" ⚠️ Binary test warning: {test_error}", flush=True)
|
| 263 |
+
else:
|
| 264 |
+
print(f" ✅ Using compiled binary (no compatibility test needed)", flush=True)
|
| 265 |
|
| 266 |
# Clean up zip file
|
| 267 |
try:
|