import os import torch from transformers import AutoTokenizer, AutoModelForCausalLM from dotenv import load_dotenv load_dotenv() hf_token = os.getenv("HF_TOKEN") model_id = "google/medgemma-1.5-4b-it" print(f"Testing HF_TOKEN: {hf_token[:5]}...{hf_token[-5:] if hf_token else 'None'}") print(f"Model ID: {model_id}") try: print("Attempting to load tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token) print("Tokenizer loaded successfully.") print("Attempting to load model config (not weights yet)...") model = AutoModelForCausalLM.from_pretrained( model_id, token=hf_token, torch_dtype=torch.bfloat16, device_map="cpu", low_cpu_mem_usage=True ) print("Model loaded successfully.") except Exception as e: print(f"DIAGNOSTIC FAILURE: {e}")