Upload improved HarryPotterGPT with better compatibility
Browse files- README.md +42 -0
- card-metadata.json +13 -0
- config.json +45 -0
- loading_helper.py +20 -0
- merges.txt +2 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +6 -0
- tokenizer.model +3 -0
- tokenizer.vocab +0 -0
- tokenizer_config.json +9 -0
- vocab.json +0 -0
README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HarryPotterGPT
|
| 2 |
+
|
| 3 |
+
A GPT model trained on Harry Potter books, created by [Camilo Vega](https://www.linkedin.com/in/camilo-vega-169084b1/), AI Consultant and Professor. The model generates text in the style of the Harry Potter saga.
|
| 4 |
+
|
| 5 |
+
## Model Information
|
| 6 |
+
|
| 7 |
+
- **Architecture**: GPT (Decoder-only Transformer)
|
| 8 |
+
- **Training**: The model was trained from scratch on Harry Potter books
|
| 9 |
+
- **Tokenizer**: SentencePiece (unigram model)
|
| 10 |
+
- **Parameters**: Approx. 124M (12 layers, 768 embedding dimensions, 12 attention heads)
|
| 11 |
+
|
| 12 |
+
## Usage
|
| 13 |
+
|
| 14 |
+
```python
|
| 15 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 16 |
+
|
| 17 |
+
# Load model and tokenizer
|
| 18 |
+
tokenizer = AutoTokenizer.from_pretrained("CamiloVega/HarryPotterGPT-v2")
|
| 19 |
+
model = AutoModelForCausalLM.from_pretrained("CamiloVega/HarryPotterGPT-v2")
|
| 20 |
+
|
| 21 |
+
# Generate text
|
| 22 |
+
prompt = "Harry looked at Hermione and"
|
| 23 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 24 |
+
outputs = model.generate(**inputs, max_length=100, temperature=0.7, top_k=50)
|
| 25 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 26 |
+
print(generated_text)
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## Examples
|
| 30 |
+
|
| 31 |
+
- "Harry looked at Hermione and saw that she was already searching through her books."
|
| 32 |
+
- "The castle of Hogwarts was illuminated by the moonlight, its towers reaching into the night sky."
|
| 33 |
+
- "Ron took out his wand and pointed it at the creature, his hand trembling slightly."
|
| 34 |
+
- "Dumbledore's eyes twinkled as he gazed at Harry over his half-moon spectacles."
|
| 35 |
+
|
| 36 |
+
## Limitations
|
| 37 |
+
|
| 38 |
+
This model was trained exclusively on Harry Potter books, so its knowledge is limited to that context. It works best with prompts related to the Harry Potter universe.
|
| 39 |
+
|
| 40 |
+
## Original Project
|
| 41 |
+
|
| 42 |
+
This model is part of an educational project on building language models from scratch. More details available at https://github.com/CamiloVga/HarryPotterGPT
|
card-metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"language": [
|
| 3 |
+
"en"
|
| 4 |
+
],
|
| 5 |
+
"tags": [
|
| 6 |
+
"harry-potter",
|
| 7 |
+
"gpt",
|
| 8 |
+
"transformer",
|
| 9 |
+
"text-generation",
|
| 10 |
+
"english"
|
| 11 |
+
],
|
| 12 |
+
"license": "mit"
|
| 13 |
+
}
|
config.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "gpt2",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"GPT2LMHeadModel"
|
| 5 |
+
],
|
| 6 |
+
"vocab_size": 6450,
|
| 7 |
+
"n_positions": 1024,
|
| 8 |
+
"n_ctx": 1024,
|
| 9 |
+
"n_embd": 768,
|
| 10 |
+
"n_layer": 12,
|
| 11 |
+
"n_head": 12,
|
| 12 |
+
"n_inner": 3072,
|
| 13 |
+
"activation_function": "gelu",
|
| 14 |
+
"resid_pdrop": 0.1,
|
| 15 |
+
"embd_pdrop": 0.1,
|
| 16 |
+
"attn_pdrop": 0.1,
|
| 17 |
+
"layer_norm_epsilon": 1e-05,
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"bos_token_id": 2,
|
| 20 |
+
"eos_token_id": 3,
|
| 21 |
+
"pad_token_id": 4,
|
| 22 |
+
"use_cache": true,
|
| 23 |
+
"original_config": {
|
| 24 |
+
"vocab_size": 6450,
|
| 25 |
+
"context_length": 1024,
|
| 26 |
+
"embed_dim": 768,
|
| 27 |
+
"num_heads": 12,
|
| 28 |
+
"ff_dim": 3072,
|
| 29 |
+
"num_layers": 12,
|
| 30 |
+
"dropout": 0.1,
|
| 31 |
+
"attention_dropout": 0.1,
|
| 32 |
+
"embedding_dropout": 0.05,
|
| 33 |
+
"use_pre_layer_norm": true,
|
| 34 |
+
"layer_norm_epsilon": 1e-05,
|
| 35 |
+
"batch_size": 16,
|
| 36 |
+
"learning_rate": 3e-05,
|
| 37 |
+
"weight_decay": 0.01,
|
| 38 |
+
"num_epochs": 40,
|
| 39 |
+
"warmup_ratio": 0.15,
|
| 40 |
+
"final_lr_ratio": 0.05,
|
| 41 |
+
"gradient_clip_val": 1.0,
|
| 42 |
+
"gradient_accumulation_steps": 2,
|
| 43 |
+
"pad_token_id": 4
|
| 44 |
+
}
|
| 45 |
+
}
|
loading_helper.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def load_harrypotter_gpt(model_path, device="auto"):
|
| 6 |
+
'''Helper function to load HarryPotterGPT model and tokenizer'''
|
| 7 |
+
# Load the tokenizer
|
| 8 |
+
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
|
| 9 |
+
|
| 10 |
+
# Ensure the tokenizer has a pad token
|
| 11 |
+
if tokenizer.pad_token is None:
|
| 12 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 13 |
+
|
| 14 |
+
# Load the model
|
| 15 |
+
model = GPT2LMHeadModel.from_pretrained(model_path)
|
| 16 |
+
|
| 17 |
+
return model, tokenizer
|
| 18 |
+
|
| 19 |
+
# Example usage:
|
| 20 |
+
# model, tokenizer = load_harrypotter_gpt("CamiloVega/HarryPotterGPT-v2")
|
merges.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GPT2 uses BPE merges, but we're using SentencePiece
|
| 2 |
+
# This file is just a placeholder
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:458044c8beedd43b17947c86c51316a5cb6577f2508b1551b8dd846f5dcb5a9f
|
| 3 |
+
size 433405902
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[BOS]",
|
| 3 |
+
"eos_token": "[EOS]",
|
| 4 |
+
"unk_token": "[UNK]",
|
| 5 |
+
"pad_token": "[PAD]"
|
| 6 |
+
}
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c6905e1f406045d0ba524de368128397ff2d649c519093b72d23c5d440922c9
|
| 3 |
+
size 350769
|
tokenizer.vocab
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "gpt2",
|
| 3 |
+
"bos_token": "[BOS]",
|
| 4 |
+
"eos_token": "[EOS]",
|
| 5 |
+
"unk_token": "[UNK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"do_lower_case": false,
|
| 8 |
+
"tokenizer_class": "GPT2Tokenizer"
|
| 9 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|