# download_scibert.py # This script downloads and loads the SciBERT model from Hugging Face. from transformers import AutoTokenizer, AutoModel import torch def load_scibert(): """ Downloads and loads the SciBERT model from Hugging Face. Model: allenai/scibert_scivocab_uncased """ model_name = "allenai/scibert_scivocab_uncased" print(f"🔽 Downloading and loading model: {model_name}") # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) # Load model (set torch_dtype to auto for device optimization) model = AutoModel.from_pretrained(model_name, torch_dtype="auto") print("✅ SciBERT model and tokenizer successfully loaded!") return tokenizer, model if __name__ == "__main__": tokenizer, model = load_scibert() # Test encoding text = "Artificial intelligence is revolutionizing scientific research." inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) print("🔍 Model output shape:", outputs.last_hidden_state.shape)