Spaces:
Running
Running
File size: 3,789 Bytes
4d5727a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | """
src/storage/paths.py — Path normalisation and ID utilities (A2.3).
Copied from src/functions.py — do NOT delete the originals (backward compat).
"""
import os
import time
import uuid
import hashlib
# Maximum allowed length for folder paths and agent IDs.
_MAX_PATH_LEN = 512
def generate_id(prefix: str) -> str:
"""Generate a time-sortable unique ID with a human-readable prefix.
Format: ``{prefix}_{base36_timestamp}_{12_hex_chars}``
"""
t = int(time.time() * 1000)
chars = "0123456789abcdefghijklmnopqrstuvwxyz"
ts_str = ""
while t > 0:
ts_str = chars[t % 36] + ts_str
t //= 36
if not ts_str:
ts_str = "0"
rand = uuid.uuid4().hex[:12]
return f"{prefix}_{ts_str}_{rand}"
def fingerprint_id(prefix: str, content: str) -> str:
"""Generate a deterministic ID by SHA-256 fingerprinting *content*.
Format: ``{prefix}_{first_16_hex_chars_of_sha256}``
"""
h = hashlib.sha256(content.strip().lower().encode("utf-8")).hexdigest()
return f"{prefix}_{h[:16]}"
def normalize_folder_path(path: str) -> str:
"""Normalize a folder path for safe use in KV scope keys.
Steps applied in order:
1. Cap the raw input at 512 characters (REQ-066).
2. Apply ``os.path.normpath`` to collapse redundant separators and
resolve any ``..`` components at the OS level.
3. Convert all OS-native separators to forward slashes.
4. Strip any remaining leading or trailing slashes.
Raises:
ValueError: if *path* is empty (before or after normalization), or
if the normalized result still contains a ``..`` segment,
which would indicate an attempt at path traversal
(REQ-064).
Returns:
A non-empty, forward-slash-separated string with no leading/trailing
slashes and no ``..`` segments — safe for use as a KV scope fragment.
Property (REQ-074): idempotent — applying this function twice yields
the same result as applying it once.
"""
if not path:
raise ValueError("folder_path must not be empty")
# 1. Length cap before any processing.
path = path[:_MAX_PATH_LEN]
# Pre-normalisation traversal check: reject any path that contains a ".."
# component in the raw input before normpath has a chance to resolve it.
raw_parts = path.replace("\\", "/").split("/")
if any(part == ".." for part in raw_parts):
raise ValueError(
f"folder_path contains path traversal segment '..': {path!r}"
)
# 2. OS-level normalisation (resolves duplicate separators, etc.)
normalized = os.path.normpath(path)
# 3. Unify separators to forward slash.
normalized = normalized.replace("\\", "/")
# 4. Strip leading / trailing slashes.
normalized = normalized.strip("/")
# Guard: also reject any ".." that somehow survives normalisation.
parts = normalized.split("/")
if any(part == ".." for part in parts):
raise ValueError(
f"folder_path contains path traversal segment '..': {path!r}"
)
if not normalized:
raise ValueError("folder_path is empty after normalization")
return normalized
def validate_agent_id(agent_id: str) -> str:
"""Validate and sanitize an agent_id before use in KV scope keys.
Strips surrounding whitespace and caps at 512 characters (REQ-066).
Raises:
ValueError: if *agent_id* is empty after stripping.
Returns:
Sanitized agent_id string.
"""
if not agent_id:
raise ValueError("agent_id must not be empty")
sanitized = agent_id.strip()[:_MAX_PATH_LEN]
if not sanitized:
raise ValueError("agent_id is empty after stripping whitespace")
return sanitized
|