zazaman commited on
Commit
b880cfc
·
1 Parent(s): d2cecb3

Compile llama.cpp in Dockerfile for architecture compatibility

Browse files
Files changed (2) hide show
  1. Dockerfile +19 -2
  2. llm_clients/qwen_translator.py +78 -88
Dockerfile CHANGED
@@ -3,15 +3,32 @@ FROM python:3.10-slim
3
  # Set working directory
4
  WORKDIR /app
5
 
6
- # Install system dependencies for PDF processing and other requirements
7
- # Note: llama.cpp binary is downloaded at runtime, no compilation needed
8
  RUN apt-get update && apt-get install -y \
9
  gcc \
10
  g++ \
11
  git \
12
  unzip \
 
 
 
13
  && rm -rf /var/lib/apt/lists/*
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Create a user to avoid running as root
16
  RUN useradd -m -u 1000 user
17
  USER user
 
3
  # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install system dependencies for PDF processing, llama.cpp compilation, and other requirements
 
7
  RUN apt-get update && apt-get install -y \
8
  gcc \
9
  g++ \
10
  git \
11
  unzip \
12
+ cmake \
13
+ make \
14
+ build-essential \
15
  && rm -rf /var/lib/apt/lists/*
16
 
17
+ # Compile llama.cpp from source (for translation feature)
18
+ # This ensures compatibility with the container's architecture
19
+ RUN cd /tmp && \
20
+ git clone --depth 1 --branch master https://github.com/ggerganov/llama.cpp.git && \
21
+ cd llama.cpp && \
22
+ mkdir build && \
23
+ cd build && \
24
+ cmake .. -DCMAKE_BUILD_TYPE=Release && \
25
+ cmake --build . --config Release -j$(nproc) && \
26
+ cp bin/main /usr/local/bin/llama-main && \
27
+ chmod 755 /usr/local/bin/llama-main && \
28
+ cd / && \
29
+ rm -rf /tmp/llama.cpp && \
30
+ echo "✅ llama.cpp compiled and installed to /usr/local/bin/llama-main"
31
+
32
  # Create a user to avoid running as root
33
  RUN useradd -m -u 1000 user
34
  USER user
llm_clients/qwen_translator.py CHANGED
@@ -50,15 +50,16 @@ class QwenTranslatorClient(LlmClient):
50
 
51
  @classmethod
52
  def _download_binary(cls) -> str:
53
- """Download and extract the pre-built llama.cpp binary from GitHub releases."""
54
- # Check OS - the Ubuntu binary only works on Linux
55
  if sys.platform == "win32":
56
  raise RuntimeError(
57
  "Translation with llama.cpp binary is not supported on Windows. "
58
- "The pre-built binary is for Linux only. "
59
  "Please use this feature on Linux or Hugging Face Spaces."
60
  )
61
 
 
62
  if cls._binary_path and os.path.exists(cls._binary_path):
63
  # Verify it's still executable
64
  if os.access(cls._binary_path, os.X_OK):
@@ -71,10 +72,24 @@ class QwenTranslatorClient(LlmClient):
71
  return cls._binary_path
72
  except Exception:
73
  pass
74
- # If we can't fix it, re-download
75
  cls._binary_path = None
76
 
77
- print("📥 Downloading pre-built llama.cpp binary...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  # Create a temporary directory for the binary
80
  if cls._binary_dir is None:
@@ -97,38 +112,8 @@ class QwenTranslatorClient(LlmClient):
97
  # If not found, we'll search after extraction
98
  binary_path = binary_dir / "main" # Default to 'main' (standard llama.cpp binary name)
99
 
100
- # Detect architecture and select appropriate binary
101
- machine = platform.machine().lower()
102
- is_linux = sys.platform.startswith('linux')
103
-
104
- # Map architectures to available binaries
105
- # Available: ubuntu-x64, ubuntu-s390x, ubuntu-vulkan-x64, macos-arm64, macos-x64, win-cpu-x64, win-cpu-arm64
106
- binary_name = "unknown"
107
- if is_linux:
108
- if machine in ['x86_64', 'amd64']:
109
- # Try standard Ubuntu x64 first (most common)
110
- zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-x64.zip"
111
- binary_name = "ubuntu-x64"
112
- elif machine == 's390x':
113
- zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-s390x.zip"
114
- binary_name = "ubuntu-s390x"
115
- elif machine in ['aarch64', 'arm64']:
116
- # No ARM Linux binary available, will need to handle this
117
- raise RuntimeError(
118
- f"ARM64 Linux architecture detected, but no pre-built ARM64 Linux binary is available. "
119
- f"Available binaries are for x86_64, s390x, macOS ARM64, and Windows. "
120
- f"Translation feature cannot work on ARM64 Linux without compiling from source."
121
- )
122
- else:
123
- # Try x64 as fallback
124
- print(f" ⚠️ Unknown Linux architecture '{machine}', trying x64 binary...", flush=True)
125
- zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-x64.zip"
126
- binary_name = "ubuntu-x64"
127
- else:
128
- # Non-Linux systems (shouldn't reach here due to earlier check, but just in case)
129
- raise RuntimeError(f"Unsupported platform: {sys.platform}")
130
-
131
- print(f" Selected binary: {binary_name} for architecture: {machine}", flush=True)
132
  zip_path = binary_dir / "llama-binary.zip"
133
 
134
  try:
@@ -220,58 +205,63 @@ class QwenTranslatorClient(LlmClient):
220
  print(f" ⚠️ Could not set permissions: {e}", flush=True)
221
 
222
  # Test if binary can actually run (check architecture compatibility)
223
- print(f" 🔍 Testing binary compatibility...", flush=True)
224
- detected_machine = platform.machine()
225
- print(f" System architecture: {detected_machine}", flush=True)
226
 
227
- try:
228
- # Try to run the binary with --help to verify it works
229
- test_result = subprocess.run(
230
- [cls._binary_path, "--help"],
231
- capture_output=True,
232
- text=True,
233
- timeout=5
234
- )
235
- if test_result.returncode == 0 or "usage" in test_result.stdout.lower() or "options" in test_result.stdout.lower():
236
- print(f" ✅ Binary is compatible and executable", flush=True)
237
- else:
238
- print(f" ⚠️ Binary test returned code {test_result.returncode}", flush=True)
239
- if test_result.stderr:
240
- print(f" Stderr: {test_result.stderr[:200]}", flush=True)
241
- except subprocess.TimeoutExpired:
242
- print(f" ⚠️ Binary test timed out", flush=True)
243
- except OSError as os_error:
244
- error_msg = str(os_error)
245
- errno = getattr(os_error, 'errno', None)
246
- if errno == 8 or "Exec format error" in error_msg or "cannot execute" in error_msg.lower():
247
- # Check what the binary actually is using 'file' command if available
248
- file_info = "unknown"
249
- try:
250
- file_result = subprocess.run(
251
- ["file", cls._binary_path],
252
- capture_output=True,
253
- text=True,
254
- timeout=2
255
- )
256
- if file_result.returncode == 0:
257
- file_info = file_result.stdout.strip()
258
- except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
259
- pass
260
-
261
- raise RuntimeError(
262
- f"Binary architecture mismatch. The downloaded binary is not compatible with this system.\n"
263
- f"System architecture: {detected_machine}\n"
264
- f"Binary selected: {binary_name}\n"
265
- f"Binary info: {file_info}\n"
266
- f"Error: {error_msg}\n"
267
- f"The binary may require specific shared libraries or a different architecture. "
268
- f"Translation feature requires a compatible llama.cpp binary for this architecture."
269
- ) from os_error
270
- else:
271
- raise
272
- except Exception as test_error:
273
- error_msg = str(test_error)
274
- print(f" ⚠️ Binary test warning: {test_error}", flush=True)
 
 
 
 
 
 
275
 
276
  # Clean up zip file
277
  try:
 
50
 
51
  @classmethod
52
  def _download_binary(cls) -> str:
53
+ """Get the llama.cpp binary - prefer compiled version, fallback to downloaded."""
54
+ # Check OS - the binary only works on Linux
55
  if sys.platform == "win32":
56
  raise RuntimeError(
57
  "Translation with llama.cpp binary is not supported on Windows. "
58
+ "The binary is for Linux only. "
59
  "Please use this feature on Linux or Hugging Face Spaces."
60
  )
61
 
62
+ # First, check if we have a cached binary path
63
  if cls._binary_path and os.path.exists(cls._binary_path):
64
  # Verify it's still executable
65
  if os.access(cls._binary_path, os.X_OK):
 
72
  return cls._binary_path
73
  except Exception:
74
  pass
75
+ # If we can't fix it, reset
76
  cls._binary_path = None
77
 
78
+ # Check for compiled binary (from Dockerfile) first
79
+ compiled_binary_paths = [
80
+ "/usr/local/bin/llama-main", # Standard location from Dockerfile
81
+ "/app/llama.cpp/build/bin/main", # Alternative location
82
+ "/usr/bin/llama-main", # Another possible location
83
+ ]
84
+
85
+ for compiled_path in compiled_binary_paths:
86
+ if os.path.exists(compiled_path) and os.access(compiled_path, os.X_OK):
87
+ cls._binary_path = compiled_path
88
+ print(f"✅ Using compiled llama.cpp binary at: {compiled_path}", flush=True)
89
+ return cls._binary_path
90
+
91
+ # If no compiled binary found, download pre-built binary as fallback
92
+ print("📥 No compiled binary found, downloading pre-built llama.cpp binary...", flush=True)
93
 
94
  # Create a temporary directory for the binary
95
  if cls._binary_dir is None:
 
112
  # If not found, we'll search after extraction
113
  binary_path = binary_dir / "main" # Default to 'main' (standard llama.cpp binary name)
114
 
115
+ # Download the zip file
116
+ zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6995/llama-b6995-bin-ubuntu-x64.zip"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  zip_path = binary_dir / "llama-binary.zip"
118
 
119
  try:
 
205
  print(f" ⚠️ Could not set permissions: {e}", flush=True)
206
 
207
  # Test if binary can actually run (check architecture compatibility)
208
+ # Skip test for compiled binaries (they should work)
209
+ is_compiled = cls._binary_path.startswith("/usr/local/bin") or cls._binary_path.startswith("/app/llama.cpp")
 
210
 
211
+ if not is_compiled:
212
+ print(f" 🔍 Testing binary compatibility...", flush=True)
213
+ machine = platform.machine()
214
+ print(f" System architecture: {machine}", flush=True)
215
+
216
+ try:
217
+ # Try to run the binary with --help to verify it works
218
+ test_result = subprocess.run(
219
+ [cls._binary_path, "--help"],
220
+ capture_output=True,
221
+ text=True,
222
+ timeout=5
223
+ )
224
+ if test_result.returncode == 0 or "usage" in test_result.stdout.lower() or "options" in test_result.stdout.lower():
225
+ print(f" ✅ Binary is compatible and executable", flush=True)
226
+ else:
227
+ print(f" ⚠️ Binary test returned code {test_result.returncode}", flush=True)
228
+ if test_result.stderr:
229
+ print(f" Stderr: {test_result.stderr[:200]}", flush=True)
230
+ except subprocess.TimeoutExpired:
231
+ print(f" ⚠️ Binary test timed out", flush=True)
232
+ except OSError as os_error:
233
+ error_msg = str(os_error)
234
+ errno = getattr(os_error, 'errno', None)
235
+ if errno == 8 or "Exec format error" in error_msg or "cannot execute" in error_msg.lower():
236
+ # Check what the binary actually is using 'file' command if available
237
+ file_info = "unknown"
238
+ try:
239
+ file_result = subprocess.run(
240
+ ["file", cls._binary_path],
241
+ capture_output=True,
242
+ text=True,
243
+ timeout=2
244
+ )
245
+ if file_result.returncode == 0:
246
+ file_info = file_result.stdout.strip()
247
+ except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
248
+ pass
249
+
250
+ raise RuntimeError(
251
+ f"Binary architecture mismatch. The downloaded binary is not compatible with this system.\n"
252
+ f"System architecture: {machine}\n"
253
+ f"Binary info: {file_info}\n"
254
+ f"Error: {error_msg}\n"
255
+ f"The Ubuntu x64 binary may not be compatible with this system. "
256
+ f"Translation feature requires a compatible llama.cpp binary for this architecture."
257
+ ) from os_error
258
+ else:
259
+ raise
260
+ except Exception as test_error:
261
+ error_msg = str(test_error)
262
+ print(f" ⚠️ Binary test warning: {test_error}", flush=True)
263
+ else:
264
+ print(f" ✅ Using compiled binary (no compatibility test needed)", flush=True)
265
 
266
  # Clean up zip file
267
  try: