""" Preload all models into HuggingFace Hub cache at Docker build time. This avoids cold-start downloads on the first request in production. """ from transformers import ( AutoFeatureExtractor, AutoModelForAudioClassification, AutoModelForSequenceClassification, AutoModelForImageClassification, AutoTokenizer, ) import sys MODEL_GROUPS = { "Audio": [ ("AutoFeatureExtractor", "MelodyMachine/Deepfake-audio-detection-V2"), ("AutoModelForAudioClassification", "MelodyMachine/Deepfake-audio-detection-V2"), ], "Text": [ ("AutoTokenizer", "fakespot-ai/roberta-base-ai-text-detection-v1"), ("AutoModelForSequenceClassification", "fakespot-ai/roberta-base-ai-text-detection-v1"), ("AutoTokenizer", "Hello-SimpleAI/chatgpt-detector-roberta"), ("AutoModelForSequenceClassification", "Hello-SimpleAI/chatgpt-detector-roberta"), ("AutoTokenizer", "vikram71198/distilroberta-base-finetuned-fake-news-detection"), ("AutoModelForSequenceClassification", "vikram71198/distilroberta-base-finetuned-fake-news-detection"), ("AutoTokenizer", "jy46604790/Fake-News-Bert-Detect"), ("AutoModelForSequenceClassification", "jy46604790/Fake-News-Bert-Detect"), ], "Image": [ ("AutoModelForImageClassification", "Ateeqq/ai-vs-human-image-detector"), ("AutoModelForImageClassification", "prithivMLmods/AI-vs-Deepfake-vs-Real"), ("AutoModelForImageClassification", "prithivMLmods/Deep-Fake-Detector-Model"), ], } LOADERS = { "AutoFeatureExtractor": AutoFeatureExtractor, "AutoModelForAudioClassification": AutoModelForAudioClassification, "AutoModelForSequenceClassification": AutoModelForSequenceClassification, "AutoModelForImageClassification": AutoModelForImageClassification, "AutoTokenizer": AutoTokenizer, } errors = [] for group, models in MODEL_GROUPS.items(): print(f"\n── {group} ──") for loader_name, model_name in models: try: print(f" Downloading {model_name} ({loader_name})...", end=" ", flush=True) LOADERS[loader_name].from_pretrained(model_name) print("OK") except Exception as e: print(f"FAILED: {e}") errors.append((model_name, str(e))) if errors: print(f"\n⚠️ {len(errors)} model(s) failed to preload (will download on first request):") for name, err in errors: print(f" - {name}: {err}") else: print("\nAll models preloaded successfully.")