gapura-oneclick / scripts /download_models.py
Muhammad Ridzki Nugraha
Deploy API and config (Batch 3)
07476a1 verified
"""Download models from Hugging Face Hub during Docker build"""
import os
import time
import shutil
from huggingface_hub import snapshot_download, login, hf_hub_download
def download_models():
token = os.environ.get("HF_TOKEN")
if token:
login(token=token)
print("Logged in to Hugging Face")
repo_id = "ridzki-nrzngr/gapura-ai-models"
max_retries = 3
for attempt in range(max_retries):
try:
print(f"Downloading models (attempt {attempt + 1}/{max_retries})...")
cache_dir = snapshot_download(
repo_id=repo_id,
token=token,
resume_download=True,
etag_timeout=30,
max_workers=1,
allow_patterns=[
"regression/*",
"multi_task_transformer.onnx",
"models/nlp/severity_bert/*",
"models/nlp/severity_classifier/*",
],
)
print(f"Models downloaded to: {cache_dir}")
dst_base = "/app/models"
# Copy regression models
regression_files = [
"resolution_predictor_latest.pkl",
"resolution_predictor.ubj",
"resolution_predictor.onnx",
"scaler.json",
"feature_names.json",
"label_encoders.json",
"anomaly_stats.pkl",
]
os.makedirs(os.path.join(dst_base, "regression"), exist_ok=True)
for f in regression_files:
src = os.path.join(cache_dir, "regression", f)
dst = os.path.join(dst_base, "regression", f)
if os.path.exists(src):
shutil.copy2(src, dst)
print(f"Copied: regression/{f}")
# Copy multi-task transformer
onnx_src = os.path.join(cache_dir, "models", "multi_task_transformer.onnx")
if os.path.exists(onnx_src):
shutil.copy2(
onnx_src, os.path.join(dst_base, "multi_task_transformer.onnx")
)
print("Copied: multi_task_transformer.onnx")
# Copy severity_bert (fine-tuned model)
severity_bert_src = os.path.join(
cache_dir, "models", "nlp", "severity_bert"
)
if os.path.exists(severity_bert_src):
dst_sev = os.path.join(dst_base, "nlp", "severity_bert")
os.makedirs(dst_sev, exist_ok=True)
for f in os.listdir(severity_bert_src):
src_file = os.path.join(severity_bert_src, f)
dst_file = os.path.join(dst_sev, f)
if os.path.isfile(src_file):
shutil.copy2(src_file, dst_file)
print(f"Copied: nlp/severity_bert/{f}")
# Copy severity_classifier (TF-IDF fallback)
severity_classifier_src = os.path.join(
cache_dir, "models", "nlp", "severity_classifier"
)
if os.path.exists(severity_classifier_src):
dst_cls = os.path.join(dst_base, "nlp", "severity_classifier")
os.makedirs(dst_cls, exist_ok=True)
for f in os.listdir(severity_classifier_src):
src_file = os.path.join(severity_classifier_src, f)
dst_file = os.path.join(dst_cls, f)
if os.path.isfile(src_file):
shutil.copy2(src_file, dst_file)
print(f"Copied: nlp/severity_classifier/{f}")
print("All models downloaded and copied successfully")
return True
except Exception as e:
print(f"Download failed (attempt {attempt + 1}): {e}")
if attempt < max_retries - 1:
wait_time = (attempt + 1) * 10
print(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
else:
print("All retries failed, will use fallback models")
return False
return False
if __name__ == "__main__":
download_models()