Buckets:

meet4150
/

ALIV_AI

meet4150/ALIV_AI / scripts /download_models.py

1.92 kB

	from __future__ import annotations

	import subprocess
	from pathlib import Path


	PROJECT_ROOT = Path(__file__).resolve().parents[1]
	MODELS_DIR = PROJECT_ROOT / "models"

	MODEL_FILES = {
	"BAAI/bge-base-en-v1.5": [
	".gitattributes",
	"config.json",
	"config_sentence_transformers.json",
	"modules.json",
	"sentence_bert_config.json",
	"special_tokens_map.json",
	"tokenizer.json",
	"tokenizer_config.json",
	"vocab.txt",
	"1_Pooling/config.json",
	"model.safetensors",
	],
	"sentence-transformers/all-MiniLM-L6-v2": [
	".gitattributes",
	"config.json",
	"config_sentence_transformers.json",
	"data_config.json",
	"modules.json",
	"sentence_bert_config.json",
	"special_tokens_map.json",
	"tokenizer.json",
	"tokenizer_config.json",
	"vocab.txt",
	"1_Pooling/config.json",
	"model.safetensors",
	],
	}


	def model_dir_name(repo_id: str) -> str:
	return repo_id.replace("/", "__")


	def download_file(repo_id: str, relative_path: str, destination: Path) -> None:
	destination.parent.mkdir(parents=True, exist_ok=True)
	if destination.exists():
	print(f"Skipping existing file: {destination}")
	return

	url = f"https://huggingface.co/{repo_id}/resolve/main/{relative_path}"
	print(f"Downloading {repo_id}/{relative_path}")
	subprocess.run(
	["curl", "-L", "--fail", url, "-o", str(destination)],
	check=True,
	)


	def main() -> None:
	MODELS_DIR.mkdir(parents=True, exist_ok=True)

	for repo_id, files in MODEL_FILES.items():
	target_dir = MODELS_DIR / model_dir_name(repo_id)
	for relative_path in files:
	download_file(repo_id, relative_path, target_dir / relative_path)

	print("Local model mirrors downloaded successfully.")


	if __name__ == "__main__":
	main()

Xet Storage Details

Size:: 1.92 kB
Xet hash:: 0016090ec7f4ff0482ca26584ef9054901cf083c7f0c077b6649144610569935

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.