kratadata commited on
Commit
0120bf8
·
verified ·
1 Parent(s): e6e1ee8

Upload folder via script

Browse files
environment.py CHANGED
@@ -10,8 +10,11 @@ VELAI_STORAGE_SECRET = os.getenv("VELAI_STORAGE_SECRET") or "super-secure"
10
  VELAI_ADMIN_PASSWORD = os.getenv("VELAI_ADMIN_PASSWORD") or "velai-admin"
11
  VELAI_APP_PASSWORD = os.getenv("VELAI_APP_PASSWORD") or ""
12
 
13
- VELAI_STORAGE_PATH = Path(os.getenv("VELAI_STORAGE_PATH") or ".storage")
14
- VELAI_BLOB_STORAGE_PATH = Path(os.getenv("VELAI_STORAGE_PATH") or VELAI_STORAGE_PATH / "blob")
 
 
 
15
 
16
  VELAI_ENABLE_DUMMY = bool(os.getenv("VELAI_ENABLE_DUMMY")) or False
17
  VELAI_LOG_LEVEL = (os.getenv("VELAI_LOG_LEVEL") or "INFO").upper()
 
10
  VELAI_ADMIN_PASSWORD = os.getenv("VELAI_ADMIN_PASSWORD") or "velai-admin"
11
  VELAI_APP_PASSWORD = os.getenv("VELAI_APP_PASSWORD") or ""
12
 
13
+ _velai_storage_path = Path(os.getenv("VELAI_STORAGE_PATH") or ".storage")
14
+ VELAI_STORAGE_PATH = _velai_storage_path
15
+ # When VELAI_STORAGE_PATH is set (e.g. /data on Hugging Face), blobs live alongside NiceGUI JSON files.
16
+ _default_blob_path = _velai_storage_path if os.getenv("VELAI_STORAGE_PATH") else _velai_storage_path / "blob"
17
+ VELAI_BLOB_STORAGE_PATH = Path(os.getenv("VELAI_BLOB_STORAGE_PATH") or _default_blob_path)
18
 
19
  VELAI_ENABLE_DUMMY = bool(os.getenv("VELAI_ENABLE_DUMMY")) or False
20
  VELAI_LOG_LEVEL = (os.getenv("VELAI_LOG_LEVEL") or "INFO").upper()
velai/app_context.py CHANGED
@@ -12,7 +12,11 @@ from velai.context_registry import ContextRegistry
12
  from velai.storage.async_blob_storage import AsyncBlobStorageAdapter
13
  from velai.storage.blob_storage import BlobStorage, FileSystemBlobStorage
14
  from velai.storage.document_storage import DocumentStorage
15
- from velai.storage.keyvalue_storage import NiceGuiKeyValueStorage, NiceGuiStorageType
 
 
 
 
16
  from velai.sync_utils import sync_wrap
17
  from velai.user_info import UserInfo
18
 
@@ -64,9 +68,24 @@ def _create_user_info(storage: DocumentStorage) -> UserInfo:
64
  return info
65
 
66
 
 
 
 
 
 
 
 
 
 
 
 
67
  def _build_app_context(session_id: str, client: Client | None) -> AppContext:
68
- app_storage = DocumentStorage(NiceGuiKeyValueStorage[dict[str, Any]](NiceGuiStorageType.General))
69
- user_storage = DocumentStorage(NiceGuiKeyValueStorage[dict[str, Any]](NiceGuiStorageType.User))
 
 
 
 
70
  blob_storage: BlobStorage = FileSystemBlobStorage(
71
  user_id=session_id, root_dir=environment.VELAI_BLOB_STORAGE_PATH, document_storage=user_storage
72
  )
 
12
  from velai.storage.async_blob_storage import AsyncBlobStorageAdapter
13
  from velai.storage.blob_storage import BlobStorage, FileSystemBlobStorage
14
  from velai.storage.document_storage import DocumentStorage
15
+ from velai.storage.keyvalue_storage import (
16
+ FilePersistentDictStorage,
17
+ NiceGuiKeyValueStorage,
18
+ NiceGuiStorageType,
19
+ )
20
  from velai.sync_utils import sync_wrap
21
  from velai.user_info import UserInfo
22
 
 
68
  return info
69
 
70
 
71
+ def _document_storage_for_session(session_id: str, storage_type: NiceGuiStorageType) -> DocumentStorage:
72
+ """Load persisted NiceGUI storage from disk for plain HTTP handlers."""
73
+ from nicegui.persistence import FilePersistentDict
74
+
75
+ suffix = "general" if storage_type == NiceGuiStorageType.General else f"user-{session_id}"
76
+ path = environment.VELAI_STORAGE_PATH / f"storage-{suffix}.json"
77
+ persistent = FilePersistentDict(path, encoding="utf-8")
78
+ persistent.initialize_sync()
79
+ return DocumentStorage(FilePersistentDictStorage[dict[str, Any]](persistent))
80
+
81
+
82
  def _build_app_context(session_id: str, client: Client | None) -> AppContext:
83
+ if client is None:
84
+ app_storage = _document_storage_for_session(session_id, NiceGuiStorageType.General)
85
+ user_storage = _document_storage_for_session(session_id, NiceGuiStorageType.User)
86
+ else:
87
+ app_storage = DocumentStorage(NiceGuiKeyValueStorage[dict[str, Any]](NiceGuiStorageType.General))
88
+ user_storage = DocumentStorage(NiceGuiKeyValueStorage[dict[str, Any]](NiceGuiStorageType.User))
89
  blob_storage: BlobStorage = FileSystemBlobStorage(
90
  user_id=session_id, root_dir=environment.VELAI_BLOB_STORAGE_PATH, document_storage=user_storage
91
  )
velai/storage/async_blob_storage.py CHANGED
@@ -117,14 +117,14 @@ class AsyncBlobStorageAdapter:
117
  )
118
  return AsyncBlob(self, blob.meta)
119
 
120
- async def get(self, blob_id: str) -> AsyncBlob | None:
121
- blob = await asyncio.to_thread(self._sync.get, blob_id)
122
  if blob is None:
123
  return None
124
  return AsyncBlob(self, blob.meta)
125
 
126
- async def require(self, blob_id: str) -> AsyncBlob:
127
- blob = await self.get(blob_id)
128
  if blob is None:
129
  raise KeyError(f"Unknown blob_id: {blob_id}")
130
  return blob
 
117
  )
118
  return AsyncBlob(self, blob.meta)
119
 
120
+ async def get(self, blob_id: str, *, original_name: str | None = None) -> AsyncBlob | None:
121
+ blob = await asyncio.to_thread(self._sync.get, blob_id, original_name=original_name)
122
  if blob is None:
123
  return None
124
  return AsyncBlob(self, blob.meta)
125
 
126
+ async def require(self, blob_id: str, *, original_name: str | None = None) -> AsyncBlob:
127
+ blob = await self.get(blob_id, original_name=original_name)
128
  if blob is None:
129
  raise KeyError(f"Unknown blob_id: {blob_id}")
130
  return blob
velai/storage/blob_storage.py CHANGED
@@ -1,6 +1,7 @@
1
  from __future__ import annotations
2
 
3
  import hashlib
 
4
  import os
5
  import uuid
6
  from abc import ABC, abstractmethod
@@ -12,6 +13,8 @@ from .blob_models import Blob, BlobMeta
12
  from .document_storage import DocumentStorage
13
  from .utils import ensure_within_dir, safe_user_dir_name, sanitize_filename, utc_now
14
 
 
 
15
 
16
  class BlobStorage(ABC):
17
  @abstractmethod
@@ -103,6 +106,64 @@ class FileSystemBlobStorage(BlobStorage):
103
  ensure_within_dir(path, user_dir)
104
  return path
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  def _get_meta(self, blob_id: str) -> BlobMeta | None:
107
  index = self._load_index()
108
  blobs = self._get_blob_dict(index)
@@ -123,7 +184,7 @@ class FileSystemBlobStorage(BlobStorage):
123
  ) -> Blob:
124
  original_name_s = sanitize_filename(original_name)
125
  blob_id = str(uuid.uuid4())
126
- storage_name = sanitize_filename(f"{blob_id}_{original_name_s}", max_len=180)
127
 
128
  sha256 = hashlib.sha256(data).hexdigest()
129
  size_bytes = len(data)
@@ -167,11 +228,13 @@ class FileSystemBlobStorage(BlobStorage):
167
 
168
  return Blob(self, meta)
169
 
170
- def get(self, blob_id: str) -> Blob | None:
171
  meta = self._get_meta(blob_id)
172
- if meta is None:
173
- return None
174
- return Blob(self, meta)
 
 
175
 
176
  def list(self) -> list[Blob]:
177
  index = self._load_index()
@@ -186,12 +249,10 @@ class FileSystemBlobStorage(BlobStorage):
186
  return [Blob(self, m) for m in items]
187
 
188
  def _open_by_meta(self, meta: BlobMeta) -> BinaryIO:
189
- path = self._path_for_storage_name(meta.storage_name)
190
- return open(path, "rb")
191
 
192
  def _read_bytes_by_meta(self, meta: BlobMeta) -> bytes:
193
- path = self._path_for_storage_name(meta.storage_name)
194
- with open(path, "rb") as f:
195
  return f.read()
196
 
197
  def update_meta(
 
1
  from __future__ import annotations
2
 
3
  import hashlib
4
+ import mimetypes
5
  import os
6
  import uuid
7
  from abc import ABC, abstractmethod
 
13
  from .document_storage import DocumentStorage
14
  from .utils import ensure_within_dir, safe_user_dir_name, sanitize_filename, utc_now
15
 
16
+ ABSOLUTE_PATH_KEY = "absolute_path"
17
+
18
 
19
  class BlobStorage(ABC):
20
  @abstractmethod
 
106
  ensure_within_dir(path, user_dir)
107
  return path
108
 
109
+ def _path_from_meta(self, meta: BlobMeta) -> Path:
110
+ abs_path = meta.extra.get(ABSOLUTE_PATH_KEY)
111
+ if abs_path:
112
+ return Path(str(abs_path))
113
+ return self._path_for_storage_name(meta.storage_name)
114
+
115
+ @staticmethod
116
+ def storage_name_for(blob_id: str, original_name: str) -> str:
117
+ original_name_s = sanitize_filename(original_name)
118
+ return sanitize_filename(f"{blob_id}_{original_name_s}", max_len=180)
119
+
120
+ def _blob_from_file(
121
+ self,
122
+ *,
123
+ blob_id: str,
124
+ original_name: str,
125
+ storage_name: str,
126
+ path: Path,
127
+ ) -> Blob:
128
+ stat = path.stat()
129
+ mime_type, _ = mimetypes.guess_type(original_name)
130
+ meta = BlobMeta(
131
+ blob_id=blob_id,
132
+ storage_name=storage_name,
133
+ original_name=sanitize_filename(original_name),
134
+ mime_type=mime_type,
135
+ size_bytes=stat.st_size,
136
+ sha256=None,
137
+ created_at=utc_now(),
138
+ updated_at=utc_now(),
139
+ extra={ABSOLUTE_PATH_KEY: str(path.resolve())},
140
+ )
141
+ return Blob(self, meta)
142
+
143
+ def get_by_filename(self, blob_id: str, original_name: str) -> Blob | None:
144
+ """Load a blob from disk when the index is missing or the session changed."""
145
+ storage_name = self.storage_name_for(blob_id, original_name)
146
+
147
+ path = self._path_for_storage_name(storage_name)
148
+ if path.is_file():
149
+ return self._blob_from_file(
150
+ blob_id=blob_id, original_name=original_name, storage_name=storage_name, path=path
151
+ )
152
+
153
+ for user_dir in self._root_dir.iterdir():
154
+ if not user_dir.is_dir():
155
+ continue
156
+ candidate = user_dir / storage_name
157
+ try:
158
+ ensure_within_dir(candidate, user_dir)
159
+ except ValueError:
160
+ continue
161
+ if candidate.is_file():
162
+ return self._blob_from_file(
163
+ blob_id=blob_id, original_name=original_name, storage_name=storage_name, path=candidate
164
+ )
165
+ return None
166
+
167
  def _get_meta(self, blob_id: str) -> BlobMeta | None:
168
  index = self._load_index()
169
  blobs = self._get_blob_dict(index)
 
184
  ) -> Blob:
185
  original_name_s = sanitize_filename(original_name)
186
  blob_id = str(uuid.uuid4())
187
+ storage_name = self.storage_name_for(blob_id, original_name)
188
 
189
  sha256 = hashlib.sha256(data).hexdigest()
190
  size_bytes = len(data)
 
228
 
229
  return Blob(self, meta)
230
 
231
+ def get(self, blob_id: str, *, original_name: str | None = None) -> Blob | None:
232
  meta = self._get_meta(blob_id)
233
+ if meta is not None:
234
+ return Blob(self, meta)
235
+ if original_name is not None:
236
+ return self.get_by_filename(blob_id, original_name)
237
+ return None
238
 
239
  def list(self) -> list[Blob]:
240
  index = self._load_index()
 
249
  return [Blob(self, m) for m in items]
250
 
251
  def _open_by_meta(self, meta: BlobMeta) -> BinaryIO:
252
+ return open(self._path_from_meta(meta), "rb")
 
253
 
254
  def _read_bytes_by_meta(self, meta: BlobMeta) -> bytes:
255
+ with open(self._path_from_meta(meta), "rb") as f:
 
256
  return f.read()
257
 
258
  def update_meta(
velai/storage/keyvalue_storage.py CHANGED
@@ -95,3 +95,25 @@ class InMemoryKeyValueStorage(KeyValueStorage[T]):
95
  if prefix is None:
96
  return list(self._data.keys())
97
  return [k for k in self._data.keys() if k.startswith(prefix)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  if prefix is None:
96
  return list(self._data.keys())
97
  return [k for k in self._data.keys() if k.startswith(prefix)]
98
+
99
+
100
+ class FilePersistentDictStorage(KeyValueStorage[T]):
101
+ """Read NiceGUI on-disk storage files directly (for stateless HTTP handlers)."""
102
+
103
+ def __init__(self, persistent_dict) -> None:
104
+ self._persistent = persistent_dict
105
+
106
+ def get(self, key: str) -> T | None:
107
+ return cast(T | None, self._persistent.get(key))
108
+
109
+ def set(self, key: str, value: T) -> None:
110
+ self._persistent[key] = value
111
+
112
+ def delete(self, key: str) -> None:
113
+ self._persistent.pop(key, None)
114
+
115
+ def keys(self, prefix: str | None = None) -> list[str]:
116
+ all_keys = list(self._persistent.keys())
117
+ if prefix is None:
118
+ return all_keys
119
+ return [k for k in all_keys if k.startswith(prefix)]
velai/storage/storage_endpoint.py CHANGED
@@ -68,7 +68,7 @@ def register():
68
  ctx = await app_context.app_context_from_request(request)
69
 
70
  try:
71
- blob = await ctx.blob_storage.require(blob_id)
72
  except KeyError as exc:
73
  raise HTTPException(status_code=404, detail="blob not found") from exc
74
 
 
68
  ctx = await app_context.app_context_from_request(request)
69
 
70
  try:
71
+ blob = await ctx.blob_storage.require(blob_id, original_name=original_name)
72
  except KeyError as exc:
73
  raise HTTPException(status_code=404, detail="blob not found") from exc
74