Spaces:
Running
Running
| """Download models from Hugging Face Hub during Docker build""" | |
| import os | |
| import time | |
| import shutil | |
| from huggingface_hub import snapshot_download, login, hf_hub_download | |
| def download_models(): | |
| token = os.environ.get("HF_TOKEN") | |
| if token: | |
| login(token=token) | |
| print("Logged in to Hugging Face") | |
| repo_id = "ridzki-nrzngr/gapura-ai-models" | |
| max_retries = 3 | |
| for attempt in range(max_retries): | |
| try: | |
| print(f"Downloading models (attempt {attempt + 1}/{max_retries})...") | |
| cache_dir = snapshot_download( | |
| repo_id=repo_id, | |
| token=token, | |
| resume_download=True, | |
| etag_timeout=30, | |
| max_workers=1, | |
| allow_patterns=[ | |
| "regression/*", | |
| "multi_task_transformer.onnx", | |
| "models/nlp/severity_bert/*", | |
| "models/nlp/severity_classifier/*", | |
| ], | |
| ) | |
| print(f"Models downloaded to: {cache_dir}") | |
| dst_base = "/app/models" | |
| # Copy regression models | |
| regression_files = [ | |
| "resolution_predictor_latest.pkl", | |
| "resolution_predictor.ubj", | |
| "resolution_predictor.onnx", | |
| "scaler.json", | |
| "feature_names.json", | |
| "label_encoders.json", | |
| "anomaly_stats.pkl", | |
| ] | |
| os.makedirs(os.path.join(dst_base, "regression"), exist_ok=True) | |
| for f in regression_files: | |
| src = os.path.join(cache_dir, "regression", f) | |
| dst = os.path.join(dst_base, "regression", f) | |
| if os.path.exists(src): | |
| shutil.copy2(src, dst) | |
| print(f"Copied: regression/{f}") | |
| # Copy multi-task transformer | |
| onnx_src = os.path.join(cache_dir, "models", "multi_task_transformer.onnx") | |
| if os.path.exists(onnx_src): | |
| shutil.copy2( | |
| onnx_src, os.path.join(dst_base, "multi_task_transformer.onnx") | |
| ) | |
| print("Copied: multi_task_transformer.onnx") | |
| # Copy severity_bert (fine-tuned model) | |
| severity_bert_src = os.path.join( | |
| cache_dir, "models", "nlp", "severity_bert" | |
| ) | |
| if os.path.exists(severity_bert_src): | |
| dst_sev = os.path.join(dst_base, "nlp", "severity_bert") | |
| os.makedirs(dst_sev, exist_ok=True) | |
| for f in os.listdir(severity_bert_src): | |
| src_file = os.path.join(severity_bert_src, f) | |
| dst_file = os.path.join(dst_sev, f) | |
| if os.path.isfile(src_file): | |
| shutil.copy2(src_file, dst_file) | |
| print(f"Copied: nlp/severity_bert/{f}") | |
| # Copy severity_classifier (TF-IDF fallback) | |
| severity_classifier_src = os.path.join( | |
| cache_dir, "models", "nlp", "severity_classifier" | |
| ) | |
| if os.path.exists(severity_classifier_src): | |
| dst_cls = os.path.join(dst_base, "nlp", "severity_classifier") | |
| os.makedirs(dst_cls, exist_ok=True) | |
| for f in os.listdir(severity_classifier_src): | |
| src_file = os.path.join(severity_classifier_src, f) | |
| dst_file = os.path.join(dst_cls, f) | |
| if os.path.isfile(src_file): | |
| shutil.copy2(src_file, dst_file) | |
| print(f"Copied: nlp/severity_classifier/{f}") | |
| print("All models downloaded and copied successfully") | |
| return True | |
| except Exception as e: | |
| print(f"Download failed (attempt {attempt + 1}): {e}") | |
| if attempt < max_retries - 1: | |
| wait_time = (attempt + 1) * 10 | |
| print(f"Retrying in {wait_time} seconds...") | |
| time.sleep(wait_time) | |
| else: | |
| print("All retries failed, will use fallback models") | |
| return False | |
| return False | |
| if __name__ == "__main__": | |
| download_models() | |