meet4150/ALIV_AI / scripts /download_models.py
download
raw
1.92 kB
from __future__ import annotations
import subprocess
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
MODELS_DIR = PROJECT_ROOT / "models"
MODEL_FILES = {
"BAAI/bge-base-en-v1.5": [
".gitattributes",
"config.json",
"config_sentence_transformers.json",
"modules.json",
"sentence_bert_config.json",
"special_tokens_map.json",
"tokenizer.json",
"tokenizer_config.json",
"vocab.txt",
"1_Pooling/config.json",
"model.safetensors",
],
"sentence-transformers/all-MiniLM-L6-v2": [
".gitattributes",
"config.json",
"config_sentence_transformers.json",
"data_config.json",
"modules.json",
"sentence_bert_config.json",
"special_tokens_map.json",
"tokenizer.json",
"tokenizer_config.json",
"vocab.txt",
"1_Pooling/config.json",
"model.safetensors",
],
}
def model_dir_name(repo_id: str) -> str:
return repo_id.replace("/", "__")
def download_file(repo_id: str, relative_path: str, destination: Path) -> None:
destination.parent.mkdir(parents=True, exist_ok=True)
if destination.exists():
print(f"Skipping existing file: {destination}")
return
url = f"https://huggingface.co/{repo_id}/resolve/main/{relative_path}"
print(f"Downloading {repo_id}/{relative_path}")
subprocess.run(
["curl", "-L", "--fail", url, "-o", str(destination)],
check=True,
)
def main() -> None:
MODELS_DIR.mkdir(parents=True, exist_ok=True)
for repo_id, files in MODEL_FILES.items():
target_dir = MODELS_DIR / model_dir_name(repo_id)
for relative_path in files:
download_file(repo_id, relative_path, target_dir / relative_path)
print("Local model mirrors downloaded successfully.")
if __name__ == "__main__":
main()

Xet Storage Details

Size:
1.92 kB
·
Xet hash:
0016090ec7f4ff0482ca26584ef9054901cf083c7f0c077b6649144610569935

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.