rajpurkar/squad
Viewer • Updated • 98.2k • 186k • 368
This is the OpenLLM small model trained for 10,000 steps on the SQUAD dataset.
This model uses a custom configuration format and requires the OpenLLM framework to load properly.
# Load using the OpenLLM framework
from core.src.model import GPTModel
import json
import torch
# Load configuration
with open("config.json", "r") as f:
config = json.load(f)
# Create model instance
model = GPTModel(config["model_config"])
# Load trained weights
model.load_state_dict(torch.load("pytorch_model.bin", map_location="cpu"))
# Load tokenizer
import sentencepiece as spm
tokenizer = spm.SentencePieceProcessor()
tokenizer.load("tokenizer.model")
# Generate text
prompt = "The future of artificial intelligence"
tokens = tokenizer.encode(prompt)
inputs = torch.tensor([tokens], dtype=torch.long)
with torch.no_grad():
outputs = model.generate(
inputs,
max_length=100,
temperature=0.7
)
generated_text = tokenizer.decode(outputs[0].tolist())
print(generated_text)
from load_hf_model import load_model_and_tokenizer
# Load model using custom loader
model, tokenizer = load_model_and_tokenizer("lemms/openllm-small-extended-10k")
# Generate text
prompt = "The history of machine learning"
tokens = tokenizer.encode(prompt)
inputs = torch.tensor([tokens], dtype=torch.long)
with torch.no_grad():
outputs = model.generate(
inputs,
max_length=100,
temperature=0.7
)
print(tokenizer.decode(outputs[0].tolist()))
This model follows the standard GPT architecture:
The model was trained using:
This model is dual-licensed:
If you use this model in your research, please cite:
@misc{openllm2024,
title={OpenLLM: Open Source Large Language Model Framework},
author={Louis Chua Bean Chong},
year={2024},
url={https://github.com/louischua/openllm}
}