File size: 3,789 Bytes
4d5727a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
src/storage/paths.py — Path normalisation and ID utilities (A2.3).

Copied from src/functions.py — do NOT delete the originals (backward compat).
"""

import os
import time
import uuid
import hashlib

# Maximum allowed length for folder paths and agent IDs.
_MAX_PATH_LEN = 512


def generate_id(prefix: str) -> str:
    """Generate a time-sortable unique ID with a human-readable prefix.

    Format: ``{prefix}_{base36_timestamp}_{12_hex_chars}``
    """
    t = int(time.time() * 1000)
    chars = "0123456789abcdefghijklmnopqrstuvwxyz"
    ts_str = ""
    while t > 0:
        ts_str = chars[t % 36] + ts_str
        t //= 36
    if not ts_str:
        ts_str = "0"
    rand = uuid.uuid4().hex[:12]
    return f"{prefix}_{ts_str}_{rand}"


def fingerprint_id(prefix: str, content: str) -> str:
    """Generate a deterministic ID by SHA-256 fingerprinting *content*.

    Format: ``{prefix}_{first_16_hex_chars_of_sha256}``
    """
    h = hashlib.sha256(content.strip().lower().encode("utf-8")).hexdigest()
    return f"{prefix}_{h[:16]}"


def normalize_folder_path(path: str) -> str:
    """Normalize a folder path for safe use in KV scope keys.

    Steps applied in order:
    1. Cap the raw input at 512 characters (REQ-066).
    2. Apply ``os.path.normpath`` to collapse redundant separators and
       resolve any ``..`` components at the OS level.
    3. Convert all OS-native separators to forward slashes.
    4. Strip any remaining leading or trailing slashes.

    Raises:
        ValueError: if *path* is empty (before or after normalization), or
                    if the normalized result still contains a ``..`` segment,
                    which would indicate an attempt at path traversal
                    (REQ-064).

    Returns:
        A non-empty, forward-slash-separated string with no leading/trailing
        slashes and no ``..`` segments — safe for use as a KV scope fragment.

    Property (REQ-074): idempotent — applying this function twice yields
    the same result as applying it once.
    """
    if not path:
        raise ValueError("folder_path must not be empty")

    # 1. Length cap before any processing.
    path = path[:_MAX_PATH_LEN]

    # Pre-normalisation traversal check: reject any path that contains a ".."
    # component in the raw input before normpath has a chance to resolve it.
    raw_parts = path.replace("\\", "/").split("/")
    if any(part == ".." for part in raw_parts):
        raise ValueError(
            f"folder_path contains path traversal segment '..': {path!r}"
        )

    # 2. OS-level normalisation (resolves duplicate separators, etc.)
    normalized = os.path.normpath(path)

    # 3. Unify separators to forward slash.
    normalized = normalized.replace("\\", "/")

    # 4. Strip leading / trailing slashes.
    normalized = normalized.strip("/")

    # Guard: also reject any ".." that somehow survives normalisation.
    parts = normalized.split("/")
    if any(part == ".." for part in parts):
        raise ValueError(
            f"folder_path contains path traversal segment '..': {path!r}"
        )

    if not normalized:
        raise ValueError("folder_path is empty after normalization")

    return normalized


def validate_agent_id(agent_id: str) -> str:
    """Validate and sanitize an agent_id before use in KV scope keys.

    Strips surrounding whitespace and caps at 512 characters (REQ-066).

    Raises:
        ValueError: if *agent_id* is empty after stripping.

    Returns:
        Sanitized agent_id string.
    """
    if not agent_id:
        raise ValueError("agent_id must not be empty")

    sanitized = agent_id.strip()[:_MAX_PATH_LEN]

    if not sanitized:
        raise ValueError("agent_id is empty after stripping whitespace")

    return sanitized