olamideba commited on
Commit
c106191
·
1 Parent(s): aa09b44

upload models and embeddings to hf hub

Browse files
.gitignore CHANGED
@@ -17,6 +17,7 @@ venv/
17
 
18
  .chroma/
19
  embeddings/
 
20
  data/
21
  models/
22
  other_models/
 
17
 
18
  .chroma/
19
  embeddings/
20
+ chromadb.tar.gz
21
  data/
22
  models/
23
  other_models/
app/scripts/download_assets.py CHANGED
@@ -8,6 +8,7 @@ import sys
8
  import tarfile
9
  import zipfile
10
  from pathlib import Path
 
11
 
12
  try:
13
  from huggingface_hub import snapshot_download, hf_hub_download
@@ -84,7 +85,8 @@ def download_chromadb(chromadb_repo: str, chromadb_dir: Path, hf_token: str | No
84
  print(f"Checking ChromaDB in {chromadb_dir}...")
85
 
86
  # Check if ChromaDB directory already has content
87
- if chromadb_dir.exists() and any(chromadb_dir.iterdir()):
 
88
  print(f"ChromaDB directory already contains files. Skipping download.")
89
  print(f"To force re-download, delete {chromadb_dir} and restart.")
90
  return
 
8
  import tarfile
9
  import zipfile
10
  from pathlib import Path
11
+ from src.settings import settings
12
 
13
  try:
14
  from huggingface_hub import snapshot_download, hf_hub_download
 
85
  print(f"Checking ChromaDB in {chromadb_dir}...")
86
 
87
  # Check if ChromaDB directory already has content
88
+ expected_chroma_path = chromadb_dir / settings.chroma_db
89
+ if expected_chroma_path.exists() and any(expected_chroma_path.iterdir()):
90
  print(f"ChromaDB directory already contains files. Skipping download.")
91
  print(f"To force re-download, delete {chromadb_dir} and restart.")
92
  return
app/src/settings.py CHANGED
@@ -6,14 +6,6 @@ from pydantic_settings import BaseSettings
6
  load_dotenv(find_dotenv())
7
 
8
 
9
- SRC_DIR: str = os.path.dirname(__file__)
10
- DATA_DIR: str = os.path.join(SRC_DIR, "../../data")
11
- MODELS_DIR: str = os.path.join(SRC_DIR, "../../models")
12
- CHROMA_DIR: str = os.path.join(SRC_DIR, "../../.chroma")
13
- CHROMA_DB: str = os.path.join(CHROMA_DIR, "bge-small-finetuned-chroma")
14
- CHROMA_COLLECTION: str = "bge_small_finetuned_astra_collection" # use bge_small_finetuned_astra_collection for the bege-large model and embeddings
15
-
16
-
17
  class Settings(BaseSettings):
18
  cohere_api_key: str = ""
19
  groq_api_key: str = ""
@@ -23,6 +15,16 @@ class Settings(BaseSettings):
23
  hf_models_repo: str = os.getenv("HF_MODELS_REPO", "")
24
  hf_chromadb_repo: str = os.getenv("HF_CHROMADB_REPO", "")
25
  hf_token: str = os.getenv("HF_TOKEN", "")
 
 
 
26
 
27
 
28
  settings = Settings()
 
 
 
 
 
 
 
 
6
  load_dotenv(find_dotenv())
7
 
8
 
 
 
 
 
 
 
 
 
9
  class Settings(BaseSettings):
10
  cohere_api_key: str = ""
11
  groq_api_key: str = ""
 
15
  hf_models_repo: str = os.getenv("HF_MODELS_REPO", "")
16
  hf_chromadb_repo: str = os.getenv("HF_CHROMADB_REPO", "")
17
  hf_token: str = os.getenv("HF_TOKEN", "")
18
+
19
+ chroma_db: str = os.getenv("CHROMA_DB", "")
20
+ chroma_collection: str = os.getenv("CHROMA_COLLECTION", "")
21
 
22
 
23
  settings = Settings()
24
+
25
+ SRC_DIR: str = os.path.dirname(__file__)
26
+ DATA_DIR: str = os.path.join(SRC_DIR, "../../data")
27
+ MODELS_DIR: str = os.path.join(SRC_DIR, "../../models")
28
+ CHROMA_DIR: str = os.path.join(SRC_DIR, "../../.chroma")
29
+ CHROMA_DB: str = os.path.join(CHROMA_DIR, settings.chroma_db)
30
+ CHROMA_COLLECTION: str = settings.chroma_collection