Hitakshi26 commited on
Commit
7921d06
·
1 Parent(s): 54c31f0

Fixing Storage

Browse files
Files changed (2) hide show
  1. src/storage/chroma_store.py +21 -7
  2. src/storage/paths.py +10 -25
src/storage/chroma_store.py CHANGED
@@ -1,13 +1,27 @@
1
  import os
2
  import chromadb
3
- from chromadb.config import Settings
4
  from src.storage.paths import nb_root
5
 
6
- def chroma_client(username: str, notebook_id: str):
7
- persist_dir = os.path.join(nb_root(username, notebook_id), "chroma")
8
- os.makedirs(persist_dir, exist_ok=True)
9
- return chromadb.PersistentClient(path=persist_dir, settings=Settings(anonymized_telemetry=False))
10
 
11
- def get_collection(username: str, notebook_id: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  client = chroma_client(username, notebook_id)
13
- return client.get_or_create_collection(name="docs")
 
 
 
 
1
  import os
2
  import chromadb
 
3
  from src.storage.paths import nb_root
4
 
 
 
 
 
5
 
6
+ def chroma_client(username, notebook_id):
7
+
8
+ persist_dir = os.path.join(
9
+ nb_root(username, notebook_id),
10
+ "chroma"
11
+ )
12
+
13
+ return chromadb.Client(
14
+ chromadb.config.Settings(
15
+ persist_directory=persist_dir,
16
+ anonymized_telemetry=False
17
+ )
18
+ )
19
+
20
+
21
+ def get_collection(username, notebook_id):
22
+
23
  client = chroma_client(username, notebook_id)
24
+
25
+ return client.get_or_create_collection(
26
+ name="notebook"
27
+ )
src/storage/paths.py CHANGED
@@ -1,33 +1,18 @@
1
  import os
2
- from pathlib import Path
3
 
4
- # If DATA_ROOT env var is not set:
5
- # - Locally: write to ./data (project folder)
6
- # - On HF: you will set DATA_ROOT=/data in Space variables (or leave it as /data there)
7
- DEFAULT_LOCAL_DATA = str(Path(__file__).resolve().parents[2] / "data")
8
 
9
- DATA_ROOT = os.environ.get("DATA_ROOT", DEFAULT_LOCAL_DATA)
 
10
 
11
- def user_root(username: str) -> str:
12
- return os.path.join(DATA_ROOT, "users", username, "notebooks")
13
-
14
- def index_path(username: str) -> str:
15
- return os.path.join(user_root(username), "index.json")
16
-
17
- def nb_root(username: str, notebook_id: str) -> str:
18
  return os.path.join(user_root(username), notebook_id)
19
 
20
  def ensure_tree(username: str, notebook_id: str):
 
21
  base = nb_root(username, notebook_id)
22
- paths = [
23
- user_root(username),
24
- os.path.join(base, "files_raw"),
25
- os.path.join(base, "files_extracted"),
26
- os.path.join(base, "chroma"),
27
- os.path.join(base, "chat"),
28
- os.path.join(base, "artifacts", "reports"),
29
- os.path.join(base, "artifacts", "quizzes"),
30
- os.path.join(base, "artifacts", "podcasts"),
31
- ]
32
- for p in paths:
33
- os.makedirs(p, exist_ok=True)
 
1
  import os
 
2
 
3
+ DATA_ROOT = os.getenv("DATA_ROOT", "./data")
 
 
 
4
 
5
+ def user_root(username: str):
6
+ return os.path.join(DATA_ROOT, username)
7
 
8
+ def nb_root(username: str, notebook_id: str):
 
 
 
 
 
 
9
  return os.path.join(user_root(username), notebook_id)
10
 
11
  def ensure_tree(username: str, notebook_id: str):
12
+
13
  base = nb_root(username, notebook_id)
14
+
15
+ os.makedirs(base, exist_ok=True)
16
+ os.makedirs(os.path.join(base, "files_raw"), exist_ok=True)
17
+ os.makedirs(os.path.join(base, "files_extracted"), exist_ok=True)
18
+ os.makedirs(os.path.join(base, "artifacts"), exist_ok=True)