Bio_Bert / Scibert
Hemanshu121's picture
Create Scibert
9ad0b8b verified
# download_scibert.py
# This script downloads and loads the SciBERT model from Hugging Face.
from transformers import AutoTokenizer, AutoModel
import torch
def load_scibert():
"""
Downloads and loads the SciBERT model from Hugging Face.
Model: allenai/scibert_scivocab_uncased
"""
model_name = "allenai/scibert_scivocab_uncased"
print(f"๐Ÿ”ฝ Downloading and loading model: {model_name}")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load model (set torch_dtype to auto for device optimization)
model = AutoModel.from_pretrained(model_name, torch_dtype="auto")
print("โœ… SciBERT model and tokenizer successfully loaded!")
return tokenizer, model
if __name__ == "__main__":
tokenizer, model = load_scibert()
# Test encoding
text = "Artificial intelligence is revolutionizing scientific research."
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
print("๐Ÿ” Model output shape:", outputs.last_hidden_state.shape)