upload models and embeddings to hf hub
Browse files- .gitignore +1 -0
- app/scripts/download_assets.py +3 -1
- app/src/settings.py +10 -8
.gitignore
CHANGED
|
@@ -17,6 +17,7 @@ venv/
|
|
| 17 |
|
| 18 |
.chroma/
|
| 19 |
embeddings/
|
|
|
|
| 20 |
data/
|
| 21 |
models/
|
| 22 |
other_models/
|
|
|
|
| 17 |
|
| 18 |
.chroma/
|
| 19 |
embeddings/
|
| 20 |
+
chromadb.tar.gz
|
| 21 |
data/
|
| 22 |
models/
|
| 23 |
other_models/
|
app/scripts/download_assets.py
CHANGED
|
@@ -8,6 +8,7 @@ import sys
|
|
| 8 |
import tarfile
|
| 9 |
import zipfile
|
| 10 |
from pathlib import Path
|
|
|
|
| 11 |
|
| 12 |
try:
|
| 13 |
from huggingface_hub import snapshot_download, hf_hub_download
|
|
@@ -84,7 +85,8 @@ def download_chromadb(chromadb_repo: str, chromadb_dir: Path, hf_token: str | No
|
|
| 84 |
print(f"Checking ChromaDB in {chromadb_dir}...")
|
| 85 |
|
| 86 |
# Check if ChromaDB directory already has content
|
| 87 |
-
|
|
|
|
| 88 |
print(f"ChromaDB directory already contains files. Skipping download.")
|
| 89 |
print(f"To force re-download, delete {chromadb_dir} and restart.")
|
| 90 |
return
|
|
|
|
| 8 |
import tarfile
|
| 9 |
import zipfile
|
| 10 |
from pathlib import Path
|
| 11 |
+
from src.settings import settings
|
| 12 |
|
| 13 |
try:
|
| 14 |
from huggingface_hub import snapshot_download, hf_hub_download
|
|
|
|
| 85 |
print(f"Checking ChromaDB in {chromadb_dir}...")
|
| 86 |
|
| 87 |
# Check if ChromaDB directory already has content
|
| 88 |
+
expected_chroma_path = chromadb_dir / settings.chroma_db
|
| 89 |
+
if expected_chroma_path.exists() and any(expected_chroma_path.iterdir()):
|
| 90 |
print(f"ChromaDB directory already contains files. Skipping download.")
|
| 91 |
print(f"To force re-download, delete {chromadb_dir} and restart.")
|
| 92 |
return
|
app/src/settings.py
CHANGED
|
@@ -6,14 +6,6 @@ from pydantic_settings import BaseSettings
|
|
| 6 |
load_dotenv(find_dotenv())
|
| 7 |
|
| 8 |
|
| 9 |
-
SRC_DIR: str = os.path.dirname(__file__)
|
| 10 |
-
DATA_DIR: str = os.path.join(SRC_DIR, "../../data")
|
| 11 |
-
MODELS_DIR: str = os.path.join(SRC_DIR, "../../models")
|
| 12 |
-
CHROMA_DIR: str = os.path.join(SRC_DIR, "../../.chroma")
|
| 13 |
-
CHROMA_DB: str = os.path.join(CHROMA_DIR, "bge-small-finetuned-chroma")
|
| 14 |
-
CHROMA_COLLECTION: str = "bge_small_finetuned_astra_collection" # use bge_small_finetuned_astra_collection for the bege-large model and embeddings
|
| 15 |
-
|
| 16 |
-
|
| 17 |
class Settings(BaseSettings):
|
| 18 |
cohere_api_key: str = ""
|
| 19 |
groq_api_key: str = ""
|
|
@@ -23,6 +15,16 @@ class Settings(BaseSettings):
|
|
| 23 |
hf_models_repo: str = os.getenv("HF_MODELS_REPO", "")
|
| 24 |
hf_chromadb_repo: str = os.getenv("HF_CHROMADB_REPO", "")
|
| 25 |
hf_token: str = os.getenv("HF_TOKEN", "")
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
settings = Settings()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
load_dotenv(find_dotenv())
|
| 7 |
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
class Settings(BaseSettings):
|
| 10 |
cohere_api_key: str = ""
|
| 11 |
groq_api_key: str = ""
|
|
|
|
| 15 |
hf_models_repo: str = os.getenv("HF_MODELS_REPO", "")
|
| 16 |
hf_chromadb_repo: str = os.getenv("HF_CHROMADB_REPO", "")
|
| 17 |
hf_token: str = os.getenv("HF_TOKEN", "")
|
| 18 |
+
|
| 19 |
+
chroma_db: str = os.getenv("CHROMA_DB", "")
|
| 20 |
+
chroma_collection: str = os.getenv("CHROMA_COLLECTION", "")
|
| 21 |
|
| 22 |
|
| 23 |
settings = Settings()
|
| 24 |
+
|
| 25 |
+
SRC_DIR: str = os.path.dirname(__file__)
|
| 26 |
+
DATA_DIR: str = os.path.join(SRC_DIR, "../../data")
|
| 27 |
+
MODELS_DIR: str = os.path.join(SRC_DIR, "../../models")
|
| 28 |
+
CHROMA_DIR: str = os.path.join(SRC_DIR, "../../.chroma")
|
| 29 |
+
CHROMA_DB: str = os.path.join(CHROMA_DIR, settings.chroma_db)
|
| 30 |
+
CHROMA_COLLECTION: str = settings.chroma_collection
|