Spaces:
Running
Running
| """Storage helpers for per-user notebook data. | |
| Spec references: | |
| - `specs/04_interfaces.md`: required storage module interface. | |
| - `specs/03_data_model.md`: JSON object storage and JSONL message layout. | |
| - `specs/07_security.md`: per-user directory isolation and path traversal prevention. | |
| - `specs/10_test_plan.md`: unit-testable storage safety behavior. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| from pathlib import Path | |
| from typing import Any | |
| class StorageError(Exception): | |
| """Base exception for storage-related failures.""" | |
| class StorageConfigurationError(StorageError): | |
| """Raised when the storage root is not configured correctly.""" | |
| class StorageFormatError(StorageError): | |
| """Raised when persisted data does not match the expected JSON shape.""" | |
| class StorageIOError(StorageError): | |
| """Raised when file reads or writes fail.""" | |
| def _data_root() -> Path: | |
| """Return the configured data root directory. | |
| Spec references: | |
| - `specs/07_security.md`: storage must enforce per-user directory isolation. | |
| - `specs/10_test_plan.md`: root selection must remain unit-testable. | |
| Returns: | |
| The resolved Path to the data root. Defaults to './data' if NOTEBOOKLM_DATA_ROOT is unset. | |
| """ | |
| raw_root: str = os.getenv("NOTEBOOKLM_DATA_ROOT", "").strip() | |
| if not raw_root: | |
| raw_root = "./data" | |
| root: Path = Path(raw_root).expanduser() | |
| root.mkdir(parents=True, exist_ok=True) | |
| return root.resolve(strict=False) | |
| def _validate_name(value: str, label: str) -> str: | |
| """Validate a user-supplied path segment before path construction. | |
| Spec references: | |
| - `specs/07_security.md`: prevent path traversal and preserve isolation. | |
| Raises: | |
| ValueError: If the supplied segment is empty or contains path separators. | |
| """ | |
| if not value or not value.strip(): | |
| raise ValueError(f"{label} must be a non-empty string.") | |
| candidate: Path = Path(value) | |
| if candidate.name != value or candidate.is_absolute(): | |
| raise ValueError(f"{label} must be a single relative path segment.") | |
| return value | |
| def user_root(username: str) -> Path: | |
| """Return the per-user storage directory. | |
| Spec references: | |
| - `specs/04_interfaces.md`: implements `user_root()`. | |
| - `specs/07_security.md`: enforces per-user directory isolation. | |
| Raises: | |
| ValueError: If `username` is not a safe single path segment. | |
| StorageConfigurationError: If the data root is not configured. | |
| StorageIOError: If the directory cannot be created. | |
| """ | |
| safe_username: str = _validate_name(username, "username") | |
| root: Path = safe_join(_data_root(), "users", safe_username) | |
| try: | |
| root.mkdir(parents=True, exist_ok=True) | |
| except OSError as exc: | |
| raise StorageIOError(f"Failed to create user root directory: {root}") from exc | |
| return root | |
| def notebook_root(username: str, notebook_id: str) -> Path: | |
| """Return the per-notebook storage directory for a user. | |
| Spec references: | |
| - `specs/04_interfaces.md`: implements `notebook_root()`. | |
| - `specs/07_security.md`: preserves per-user notebook isolation. | |
| Raises: | |
| ValueError: If `username` or `notebook_id` is unsafe. | |
| StorageConfigurationError: If the data root is not configured. | |
| StorageIOError: If the directory cannot be created. | |
| """ | |
| safe_notebook_id: str = _validate_name(notebook_id, "notebook_id") | |
| root: Path = safe_join(user_root(username), "notebooks", safe_notebook_id) | |
| try: | |
| root.mkdir(parents=True, exist_ok=True) | |
| except OSError as exc: | |
| raise StorageIOError(f"Failed to create notebook root directory: {root}") from exc | |
| return root | |
| def safe_join(root: Path, *parts: str | os.PathLike[str]) -> Path: | |
| """Join path parts beneath `root` while preventing traversal. | |
| Spec references: | |
| - `specs/04_interfaces.md`: implements `safe_join()`. | |
| - `specs/07_security.md`: resolved path must remain inside the root. | |
| - `specs/10_test_plan.md`: supports storage safety unit tests. | |
| Args: | |
| root: The directory boundary that must contain the resolved result. | |
| *parts: Relative path segments to join beneath `root`. | |
| Returns: | |
| A resolved path contained within `root`. | |
| Raises: | |
| ValueError: If traversal is attempted or an absolute path is supplied. | |
| StorageIOError: If the root directory cannot be prepared. | |
| """ | |
| try: | |
| root.mkdir(parents=True, exist_ok=True) | |
| except OSError as exc: | |
| raise StorageIOError(f"Failed to prepare storage root: {root}") from exc | |
| resolved_root: Path = root.resolve(strict=False) | |
| candidate: Path = resolved_root | |
| for part in parts: | |
| part_path: Path = Path(part) | |
| if part_path.is_absolute(): | |
| raise ValueError(f"Absolute paths are not allowed in safe_join: {part_path}") | |
| candidate = candidate / part_path | |
| resolved_candidate: Path = candidate.resolve(strict=False) | |
| try: | |
| resolved_candidate.relative_to(resolved_root) | |
| except ValueError as exc: | |
| raise ValueError( | |
| f"Path traversal detected for root '{resolved_root}' and path '{resolved_candidate}'." | |
| ) from exc | |
| return resolved_candidate | |
| def read_json(path: Path) -> dict[str, Any]: | |
| """Read a JSON object from disk. | |
| Spec references: | |
| - `specs/04_interfaces.md`: implements `read_json()`. | |
| - `specs/03_data_model.md`: persisted JSON files use object-shaped payloads. | |
| Raises: | |
| StorageIOError: If the file cannot be opened or parsed. | |
| StorageFormatError: If the decoded JSON is not a top-level object. | |
| """ | |
| try: | |
| with path.open("r", encoding="utf-8") as handle: | |
| payload: Any = json.load(handle) | |
| except FileNotFoundError as exc: | |
| raise StorageIOError(f"JSON file does not exist: {path}") from exc | |
| except json.JSONDecodeError as exc: | |
| raise StorageIOError(f"Invalid JSON in file: {path}") from exc | |
| except OSError as exc: | |
| raise StorageIOError(f"Failed to read JSON file: {path}") from exc | |
| if not isinstance(payload, dict): | |
| raise StorageFormatError(f"Expected a JSON object in file: {path}") | |
| return payload | |
| def write_json(path: Path, obj: dict[str, Any]) -> None: | |
| """Write a JSON object to disk. | |
| Spec references: | |
| - `specs/04_interfaces.md`: implements `write_json()`. | |
| - `specs/03_data_model.md`: persisted metadata files are JSON objects. | |
| Raises: | |
| StorageFormatError: If `obj` is not a dictionary. | |
| StorageIOError: If the file cannot be written. | |
| """ | |
| if not isinstance(obj, dict): | |
| raise StorageFormatError("write_json expects a dictionary object.") | |
| try: | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| with path.open("w", encoding="utf-8", newline="\n") as handle: | |
| json.dump(obj, handle, ensure_ascii=True, indent=2, sort_keys=True) | |
| handle.write("\n") | |
| except OSError as exc: | |
| raise StorageIOError(f"Failed to write JSON file: {path}") from exc | |
| def append_jsonl(path: Path, obj: dict[str, Any]) -> None: | |
| """Append one JSON object as one line to a JSONL file. | |
| Spec references: | |
| - `specs/04_interfaces.md`: implements `append_jsonl()`. | |
| - `specs/03_data_model.md`: `messages.jsonl` stores one JSON object per line. | |
| Raises: | |
| StorageFormatError: If `obj` is not a dictionary. | |
| StorageIOError: If the file cannot be appended. | |
| """ | |
| if not isinstance(obj, dict): | |
| raise StorageFormatError("append_jsonl expects a dictionary object.") | |
| try: | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| with path.open("a", encoding="utf-8", newline="\n") as handle: | |
| handle.write(json.dumps(obj, ensure_ascii=True, sort_keys=True)) | |
| handle.write("\n") | |
| except OSError as exc: | |
| raise StorageIOError(f"Failed to append JSONL file: {path}") from exc | |