Spaces:

zazaman
/

guardrails-final

Sleeping

App Files Files Community

zazaman commited on Nov 9

Commit

b880cfc

1 Parent(s): d2cecb3

Compile llama.cpp in Dockerfile for architecture compatibility

Browse files

Files changed (2) hide show

Dockerfile +19 -2
llm_clients/qwen_translator.py +78 -88

Dockerfile CHANGED Viewed

@@ -3,15 +3,32 @@ FROM python:3.10-slim
 # Set working directory
 WORKDIR /app
-# Install system dependencies for PDF processing and other requirements
-# Note: llama.cpp binary is downloaded at runtime, no compilation needed
 RUN apt-get update && apt-get install -y \
     gcc \
     g++ \
     git \
     unzip \
     && rm -rf /var/lib/apt/lists/*
 # Create a user to avoid running as root
 RUN useradd -m -u 1000 user
 USER user

 # Set working directory
 WORKDIR /app
+# Install system dependencies for PDF processing, llama.cpp compilation, and other requirements
 RUN apt-get update && apt-get install -y \
     gcc \
     g++ \
     git \
     unzip \
+    cmake \
+    make \
+    build-essential \
     && rm -rf /var/lib/apt/lists/*
+# Compile llama.cpp from source (for translation feature)
+# This ensures compatibility with the container's architecture
+RUN cd /tmp && \
+    git clone --depth 1 --branch master https://github.com/ggerganov/llama.cpp.git && \
+    cd llama.cpp && \
+    mkdir build && \
+    cd build && \
+    cmake .. -DCMAKE_BUILD_TYPE=Release && \
+    cmake --build . --config Release -j$(nproc) && \
+    cp bin/main /usr/local/bin/llama-main && \
+    chmod 755 /usr/local/bin/llama-main && \
+    cd / && \
+    rm -rf /tmp/llama.cpp && \
+    echo "✅ llama.cpp compiled and installed to /usr/local/bin/llama-main"
 # Create a user to avoid running as root
 RUN useradd -m -u 1000 user
 USER user

llm_clients/qwen_translator.py CHANGED Viewed

@@ -50,15 +50,16 @@ class QwenTranslatorClient(LlmClient):
     @classmethod
     def _download_binary(cls) -> str:
-        """Download and extract the pre-built llama.cpp binary from GitHub releases."""
-        # Check OS - the Ubuntu binary only works on Linux
         if sys.platform == "win32":
             raise RuntimeError(
                 "Translation with llama.cpp binary is not supported on Windows. "
-                "The pre-built binary is for Linux only. "
                 "Please use this feature on Linux or Hugging Face Spaces."
             )
         if cls._binary_path and os.path.exists(cls._binary_path):
             # Verify it's still executable
             if os.access(cls._binary_path, os.X_OK):
@@ -71,10 +72,24 @@ class QwenTranslatorClient(LlmClient):
                         return cls._binary_path
                 except Exception:
                     pass
-                # If we can't fix it, re-download
                 cls._binary_path = None
-        print("📥 Downloading pre-built llama.cpp binary...")
         # Create a temporary directory for the binary
         if cls._binary_dir is None:
@@ -97,38 +112,8 @@ class QwenTranslatorClient(LlmClient):
         # If not found, we'll search after extraction
         binary_path = binary_dir / "main"  # Default to 'main' (standard llama.cpp binary name)
-        # Detect architecture and select appropriate binary
-        machine = platform.machine().lower()
-        is_linux = sys.platform.startswith('linux')
-        # Map architectures to available binaries
-        # Available: ubuntu-x64, ubuntu-s390x, ubuntu-vulkan-x64, macos-arm64, macos-x64, win-cpu-x64, win-cpu-arm64
-        binary_name = "unknown"
-        if is_linux:
-            if machine in ['x86_64', 'amd64']:
-                # Try standard Ubuntu x64 first (most common)
-                zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-x64.zip"
-                binary_name = "ubuntu-x64"
-            elif machine == 's390x':
-                zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-s390x.zip"
-                binary_name = "ubuntu-s390x"
-            elif machine in ['aarch64', 'arm64']:
-                # No ARM Linux binary available, will need to handle this
-                raise RuntimeError(
-                    f"ARM64 Linux architecture detected, but no pre-built ARM64 Linux binary is available. "
-                    f"Available binaries are for x86_64, s390x, macOS ARM64, and Windows. "
-                    f"Translation feature cannot work on ARM64 Linux without compiling from source."
-                )
-            else:
-                # Try x64 as fallback
-                print(f"   ⚠️  Unknown Linux architecture '{machine}', trying x64 binary...", flush=True)
-                zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6996/llama-b6996-bin-ubuntu-x64.zip"
-                binary_name = "ubuntu-x64"
-        else:
-            # Non-Linux systems (shouldn't reach here due to earlier check, but just in case)
-            raise RuntimeError(f"Unsupported platform: {sys.platform}")
-        print(f"   Selected binary: {binary_name} for architecture: {machine}", flush=True)
         zip_path = binary_dir / "llama-binary.zip"
         try:
@@ -220,58 +205,63 @@ class QwenTranslatorClient(LlmClient):
                     print(f"   ⚠️  Could not set permissions: {e}", flush=True)
             # Test if binary can actually run (check architecture compatibility)
-            print(f"   🔍 Testing binary compatibility...", flush=True)
-            detected_machine = platform.machine()
-            print(f"   System architecture: {detected_machine}", flush=True)
-            try:
-                # Try to run the binary with --help to verify it works
-                test_result = subprocess.run(
-                    [cls._binary_path, "--help"],
-                    capture_output=True,
-                    text=True,
-                    timeout=5
-                )
-                if test_result.returncode == 0 or "usage" in test_result.stdout.lower() or "options" in test_result.stdout.lower():
-                    print(f"   ✅ Binary is compatible and executable", flush=True)
-                else:
-                    print(f"   ⚠️  Binary test returned code {test_result.returncode}", flush=True)
-                    if test_result.stderr:
-                        print(f"   Stderr: {test_result.stderr[:200]}", flush=True)
-            except subprocess.TimeoutExpired:
-                print(f"   ⚠️  Binary test timed out", flush=True)
-            except OSError as os_error:
-                error_msg = str(os_error)
-                errno = getattr(os_error, 'errno', None)
-                if errno == 8 or "Exec format error" in error_msg or "cannot execute" in error_msg.lower():
-                    # Check what the binary actually is using 'file' command if available
-                    file_info = "unknown"
-                    try:
-                        file_result = subprocess.run(
-                            ["file", cls._binary_path],
-                            capture_output=True,
-                            text=True,
-                            timeout=2
-                        )
-                        if file_result.returncode == 0:
-                            file_info = file_result.stdout.strip()
-                    except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
-                        pass
-                    raise RuntimeError(
-                        f"Binary architecture mismatch. The downloaded binary is not compatible with this system.\n"
-                        f"System architecture: {detected_machine}\n"
-                        f"Binary selected: {binary_name}\n"
-                        f"Binary info: {file_info}\n"
-                        f"Error: {error_msg}\n"
-                        f"The binary may require specific shared libraries or a different architecture. "
-                        f"Translation feature requires a compatible llama.cpp binary for this architecture."
-                    ) from os_error
-                else:
-                    raise
-            except Exception as test_error:
-                error_msg = str(test_error)
-                print(f"   ⚠️  Binary test warning: {test_error}", flush=True)
             # Clean up zip file
             try:

     @classmethod
     def _download_binary(cls) -> str:
+        """Get the llama.cpp binary - prefer compiled version, fallback to downloaded."""
+        # Check OS - the binary only works on Linux
         if sys.platform == "win32":
             raise RuntimeError(
                 "Translation with llama.cpp binary is not supported on Windows. "
+                "The binary is for Linux only. "
                 "Please use this feature on Linux or Hugging Face Spaces."
             )
+        # First, check if we have a cached binary path
         if cls._binary_path and os.path.exists(cls._binary_path):
             # Verify it's still executable
             if os.access(cls._binary_path, os.X_OK):
                         return cls._binary_path
                 except Exception:
                     pass
+                # If we can't fix it, reset
                 cls._binary_path = None
+        # Check for compiled binary (from Dockerfile) first
+        compiled_binary_paths = [
+            "/usr/local/bin/llama-main",  # Standard location from Dockerfile
+            "/app/llama.cpp/build/bin/main",  # Alternative location
+            "/usr/bin/llama-main",  # Another possible location
+        ]
+        for compiled_path in compiled_binary_paths:
+            if os.path.exists(compiled_path) and os.access(compiled_path, os.X_OK):
+                cls._binary_path = compiled_path
+                print(f"✅ Using compiled llama.cpp binary at: {compiled_path}", flush=True)
+                return cls._binary_path
+        # If no compiled binary found, download pre-built binary as fallback
+        print("📥 No compiled binary found, downloading pre-built llama.cpp binary...", flush=True)
         # Create a temporary directory for the binary
         if cls._binary_dir is None:
         # If not found, we'll search after extraction
         binary_path = binary_dir / "main"  # Default to 'main' (standard llama.cpp binary name)
+        # Download the zip file
+        zip_url = "https://github.com/ggml-org/llama.cpp/releases/download/b6995/llama-b6995-bin-ubuntu-x64.zip"
         zip_path = binary_dir / "llama-binary.zip"
         try:
                     print(f"   ⚠️  Could not set permissions: {e}", flush=True)
             # Test if binary can actually run (check architecture compatibility)
+            # Skip test for compiled binaries (they should work)
+            is_compiled = cls._binary_path.startswith("/usr/local/bin") or cls._binary_path.startswith("/app/llama.cpp")
+            if not is_compiled:
+                print(f"   🔍 Testing binary compatibility...", flush=True)
+                machine = platform.machine()
+                print(f"   System architecture: {machine}", flush=True)
+                try:
+                    # Try to run the binary with --help to verify it works
+                    test_result = subprocess.run(
+                        [cls._binary_path, "--help"],
+                        capture_output=True,
+                        text=True,
+                        timeout=5
+                    )
+                    if test_result.returncode == 0 or "usage" in test_result.stdout.lower() or "options" in test_result.stdout.lower():
+                        print(f"   ✅ Binary is compatible and executable", flush=True)
+                    else:
+                        print(f"   ⚠️  Binary test returned code {test_result.returncode}", flush=True)
+                        if test_result.stderr:
+                            print(f"   Stderr: {test_result.stderr[:200]}", flush=True)
+                except subprocess.TimeoutExpired:
+                    print(f"   ⚠️  Binary test timed out", flush=True)
+                except OSError as os_error:
+                    error_msg = str(os_error)
+                    errno = getattr(os_error, 'errno', None)
+                    if errno == 8 or "Exec format error" in error_msg or "cannot execute" in error_msg.lower():
+                        # Check what the binary actually is using 'file' command if available
+                        file_info = "unknown"
+                        try:
+                            file_result = subprocess.run(
+                                ["file", cls._binary_path],
+                                capture_output=True,
+                                text=True,
+                                timeout=2
+                            )
+                            if file_result.returncode == 0:
+                                file_info = file_result.stdout.strip()
+                        except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
+                            pass
+                        raise RuntimeError(
+                            f"Binary architecture mismatch. The downloaded binary is not compatible with this system.\n"
+                            f"System architecture: {machine}\n"
+                            f"Binary info: {file_info}\n"
+                            f"Error: {error_msg}\n"
+                            f"The Ubuntu x64 binary may not be compatible with this system. "
+                            f"Translation feature requires a compatible llama.cpp binary for this architecture."
+                        ) from os_error
+                    else:
+                        raise
+                except Exception as test_error:
+                    error_msg = str(test_error)
+                    print(f"   ⚠️  Binary test warning: {test_error}", flush=True)
+            else:
+                print(f"   ✅ Using compiled binary (no compatibility test needed)", flush=True)
             # Clean up zip file
             try: